xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
15ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level,
105ffd83dbSDimitry Andric // after the legalizer.
115ffd83dbSDimitry Andric //
125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
135ffd83dbSDimitry Andric 
14e8d8bef9SDimitry Andric #include "AMDGPU.h"
15349cc55cSDimitry Andric #include "AMDGPUCombinerHelper.h"
165ffd83dbSDimitry Andric #include "AMDGPULegalizerInfo.h"
17e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
18e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
195ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h"
205ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
215ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
225ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
235ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
245ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
255ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
264824e7fdSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
27e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
285ffd83dbSDimitry Andric 
295ffd83dbSDimitry Andric #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
305ffd83dbSDimitry Andric 
315ffd83dbSDimitry Andric using namespace llvm;
325ffd83dbSDimitry Andric using namespace MIPatternMatch;
335ffd83dbSDimitry Andric 
34e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelper {
35e8d8bef9SDimitry Andric protected:
36e8d8bef9SDimitry Andric   MachineIRBuilder &B;
37e8d8bef9SDimitry Andric   MachineFunction &MF;
38e8d8bef9SDimitry Andric   MachineRegisterInfo &MRI;
39349cc55cSDimitry Andric   AMDGPUCombinerHelper &Helper;
40e8d8bef9SDimitry Andric 
41e8d8bef9SDimitry Andric public:
42349cc55cSDimitry Andric   AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B,
43349cc55cSDimitry Andric                                     AMDGPUCombinerHelper &Helper)
44e8d8bef9SDimitry Andric       : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
45e8d8bef9SDimitry Andric 
465ffd83dbSDimitry Andric   struct FMinFMaxLegacyInfo {
475ffd83dbSDimitry Andric     Register LHS;
485ffd83dbSDimitry Andric     Register RHS;
495ffd83dbSDimitry Andric     Register True;
505ffd83dbSDimitry Andric     Register False;
515ffd83dbSDimitry Andric     CmpInst::Predicate Pred;
525ffd83dbSDimitry Andric   };
535ffd83dbSDimitry Andric 
545ffd83dbSDimitry Andric   // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
55e8d8bef9SDimitry Andric   bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
56e8d8bef9SDimitry Andric   void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
57e8d8bef9SDimitry Andric                                          const FMinFMaxLegacyInfo &Info);
58e8d8bef9SDimitry Andric 
59e8d8bef9SDimitry Andric   bool matchUCharToFloat(MachineInstr &MI);
60e8d8bef9SDimitry Andric   void applyUCharToFloat(MachineInstr &MI);
61e8d8bef9SDimitry Andric 
624824e7fdSDimitry Andric   bool matchRcpSqrtToRsq(MachineInstr &MI,
634824e7fdSDimitry Andric                          std::function<void(MachineIRBuilder &)> &MatchInfo);
644824e7fdSDimitry Andric 
65e8d8bef9SDimitry Andric   // FIXME: Should be able to have 2 separate matchdatas rather than custom
66e8d8bef9SDimitry Andric   // struct boilerplate.
67e8d8bef9SDimitry Andric   struct CvtF32UByteMatchInfo {
68e8d8bef9SDimitry Andric     Register CvtVal;
69e8d8bef9SDimitry Andric     unsigned ShiftOffset;
70e8d8bef9SDimitry Andric   };
71e8d8bef9SDimitry Andric 
72e8d8bef9SDimitry Andric   bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
73e8d8bef9SDimitry Andric   void applyCvtF32UByteN(MachineInstr &MI,
74e8d8bef9SDimitry Andric                          const CvtF32UByteMatchInfo &MatchInfo);
75fe6060f1SDimitry Andric 
76fe6060f1SDimitry Andric   bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg);
77e8d8bef9SDimitry Andric };
78e8d8bef9SDimitry Andric 
79e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
80e8d8bef9SDimitry Andric     MachineInstr &MI, FMinFMaxLegacyInfo &Info) {
815ffd83dbSDimitry Andric   // FIXME: Type predicate on pattern
825ffd83dbSDimitry Andric   if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
835ffd83dbSDimitry Andric     return false;
845ffd83dbSDimitry Andric 
855ffd83dbSDimitry Andric   Register Cond = MI.getOperand(1).getReg();
865ffd83dbSDimitry Andric   if (!MRI.hasOneNonDBGUse(Cond) ||
875ffd83dbSDimitry Andric       !mi_match(Cond, MRI,
885ffd83dbSDimitry Andric                 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
895ffd83dbSDimitry Andric     return false;
905ffd83dbSDimitry Andric 
915ffd83dbSDimitry Andric   Info.True = MI.getOperand(2).getReg();
925ffd83dbSDimitry Andric   Info.False = MI.getOperand(3).getReg();
935ffd83dbSDimitry Andric 
945ffd83dbSDimitry Andric   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
955ffd83dbSDimitry Andric       !(Info.LHS == Info.False && Info.RHS == Info.True))
965ffd83dbSDimitry Andric     return false;
975ffd83dbSDimitry Andric 
985ffd83dbSDimitry Andric   switch (Info.Pred) {
995ffd83dbSDimitry Andric   case CmpInst::FCMP_FALSE:
1005ffd83dbSDimitry Andric   case CmpInst::FCMP_OEQ:
1015ffd83dbSDimitry Andric   case CmpInst::FCMP_ONE:
1025ffd83dbSDimitry Andric   case CmpInst::FCMP_ORD:
1035ffd83dbSDimitry Andric   case CmpInst::FCMP_UNO:
1045ffd83dbSDimitry Andric   case CmpInst::FCMP_UEQ:
1055ffd83dbSDimitry Andric   case CmpInst::FCMP_UNE:
1065ffd83dbSDimitry Andric   case CmpInst::FCMP_TRUE:
1075ffd83dbSDimitry Andric     return false;
1085ffd83dbSDimitry Andric   default:
1095ffd83dbSDimitry Andric     return true;
1105ffd83dbSDimitry Andric   }
1115ffd83dbSDimitry Andric }
1125ffd83dbSDimitry Andric 
113e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
114e8d8bef9SDimitry Andric     MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
115e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
116e8d8bef9SDimitry Andric   auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
117e8d8bef9SDimitry Andric     B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
1185ffd83dbSDimitry Andric   };
1195ffd83dbSDimitry Andric 
1205ffd83dbSDimitry Andric   switch (Info.Pred) {
1215ffd83dbSDimitry Andric   case CmpInst::FCMP_ULT:
1225ffd83dbSDimitry Andric   case CmpInst::FCMP_ULE:
1235ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1245ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
1255ffd83dbSDimitry Andric     else
1265ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
1275ffd83dbSDimitry Andric     break;
1285ffd83dbSDimitry Andric   case CmpInst::FCMP_OLE:
1295ffd83dbSDimitry Andric   case CmpInst::FCMP_OLT: {
1305ffd83dbSDimitry Andric     // We need to permute the operands to get the correct NaN behavior. The
1315ffd83dbSDimitry Andric     // selected operand is the second one based on the failing compare with NaN,
1325ffd83dbSDimitry Andric     // so permute it based on the compare type the hardware uses.
1335ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1345ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
1355ffd83dbSDimitry Andric     else
1365ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
1375ffd83dbSDimitry Andric     break;
1385ffd83dbSDimitry Andric   }
1395ffd83dbSDimitry Andric   case CmpInst::FCMP_UGE:
1405ffd83dbSDimitry Andric   case CmpInst::FCMP_UGT: {
1415ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1425ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
1435ffd83dbSDimitry Andric     else
1445ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
1455ffd83dbSDimitry Andric     break;
1465ffd83dbSDimitry Andric   }
1475ffd83dbSDimitry Andric   case CmpInst::FCMP_OGT:
1485ffd83dbSDimitry Andric   case CmpInst::FCMP_OGE: {
1495ffd83dbSDimitry Andric     if (Info.LHS == Info.True)
1505ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
1515ffd83dbSDimitry Andric     else
1525ffd83dbSDimitry Andric       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
1535ffd83dbSDimitry Andric     break;
1545ffd83dbSDimitry Andric   }
1555ffd83dbSDimitry Andric   default:
1565ffd83dbSDimitry Andric     llvm_unreachable("predicate should not have matched");
1575ffd83dbSDimitry Andric   }
1585ffd83dbSDimitry Andric 
1595ffd83dbSDimitry Andric   MI.eraseFromParent();
1605ffd83dbSDimitry Andric }
1615ffd83dbSDimitry Andric 
162e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
1635ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1645ffd83dbSDimitry Andric 
1655ffd83dbSDimitry Andric   // TODO: We could try to match extracting the higher bytes, which would be
1665ffd83dbSDimitry Andric   // easier if i8 vectors weren't promoted to i32 vectors, particularly after
1675ffd83dbSDimitry Andric   // types are legalized. v4i8 -> v4f32 is probably the only case to worry
1685ffd83dbSDimitry Andric   // about in practice.
1695ffd83dbSDimitry Andric   LLT Ty = MRI.getType(DstReg);
1705ffd83dbSDimitry Andric   if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
1715ffd83dbSDimitry Andric     Register SrcReg = MI.getOperand(1).getReg();
1725ffd83dbSDimitry Andric     unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
1735ffd83dbSDimitry Andric     assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
1745ffd83dbSDimitry Andric     const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
1755ffd83dbSDimitry Andric     return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
1765ffd83dbSDimitry Andric   }
1775ffd83dbSDimitry Andric 
1785ffd83dbSDimitry Andric   return false;
1795ffd83dbSDimitry Andric }
1805ffd83dbSDimitry Andric 
181e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
182e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
1835ffd83dbSDimitry Andric 
1845ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
1855ffd83dbSDimitry Andric 
1865ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1875ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
188e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(DstReg);
189e8d8bef9SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
1905ffd83dbSDimitry Andric   if (SrcTy != S32)
1915ffd83dbSDimitry Andric     SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
1925ffd83dbSDimitry Andric 
1935ffd83dbSDimitry Andric   if (Ty == S32) {
1945ffd83dbSDimitry Andric     B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
1955ffd83dbSDimitry Andric                    {SrcReg}, MI.getFlags());
1965ffd83dbSDimitry Andric   } else {
1975ffd83dbSDimitry Andric     auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
1985ffd83dbSDimitry Andric                              {SrcReg}, MI.getFlags());
1995ffd83dbSDimitry Andric     B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
2005ffd83dbSDimitry Andric   }
2015ffd83dbSDimitry Andric 
2025ffd83dbSDimitry Andric   MI.eraseFromParent();
2035ffd83dbSDimitry Andric }
2045ffd83dbSDimitry Andric 
2054824e7fdSDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq(
2064824e7fdSDimitry Andric     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2074824e7fdSDimitry Andric 
2084824e7fdSDimitry Andric   auto getRcpSrc = [=](const MachineInstr &MI) {
2094824e7fdSDimitry Andric     MachineInstr *ResMI = nullptr;
2104824e7fdSDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
2114824e7fdSDimitry Andric         MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
2124824e7fdSDimitry Andric       ResMI = MRI.getVRegDef(MI.getOperand(2).getReg());
2134824e7fdSDimitry Andric 
2144824e7fdSDimitry Andric     return ResMI;
2154824e7fdSDimitry Andric   };
2164824e7fdSDimitry Andric 
2174824e7fdSDimitry Andric   auto getSqrtSrc = [=](const MachineInstr &MI) {
2184824e7fdSDimitry Andric     MachineInstr *SqrtSrcMI = nullptr;
219*bdd1243dSDimitry Andric     auto Match =
2204824e7fdSDimitry Andric         mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
221*bdd1243dSDimitry Andric     (void)Match;
2224824e7fdSDimitry Andric     return SqrtSrcMI;
2234824e7fdSDimitry Andric   };
2244824e7fdSDimitry Andric 
2254824e7fdSDimitry Andric   MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
2264824e7fdSDimitry Andric   // rcp(sqrt(x))
2274824e7fdSDimitry Andric   if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
2284824e7fdSDimitry Andric     MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
2294824e7fdSDimitry Andric       B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
2304824e7fdSDimitry Andric           .addUse(SqrtSrcMI->getOperand(0).getReg())
2314824e7fdSDimitry Andric           .setMIFlags(MI.getFlags());
2324824e7fdSDimitry Andric     };
2334824e7fdSDimitry Andric     return true;
2344824e7fdSDimitry Andric   }
2354824e7fdSDimitry Andric 
2364824e7fdSDimitry Andric   // sqrt(rcp(x))
2374824e7fdSDimitry Andric   if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
2384824e7fdSDimitry Andric     MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
2394824e7fdSDimitry Andric       B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
2404824e7fdSDimitry Andric           .addUse(RcpSrcMI->getOperand(0).getReg())
2414824e7fdSDimitry Andric           .setMIFlags(MI.getFlags());
2424824e7fdSDimitry Andric     };
2434824e7fdSDimitry Andric     return true;
2444824e7fdSDimitry Andric   }
2454824e7fdSDimitry Andric 
2464824e7fdSDimitry Andric   return false;
2474824e7fdSDimitry Andric }
2484824e7fdSDimitry Andric 
249e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
250e8d8bef9SDimitry Andric     MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
2515ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
2525ffd83dbSDimitry Andric 
2535ffd83dbSDimitry Andric   // Look through G_ZEXT.
254*bdd1243dSDimitry Andric   bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
2555ffd83dbSDimitry Andric 
2565ffd83dbSDimitry Andric   Register Src0;
2575ffd83dbSDimitry Andric   int64_t ShiftAmt;
258*bdd1243dSDimitry Andric   IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
2595ffd83dbSDimitry Andric   if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
2605ffd83dbSDimitry Andric     const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
2615ffd83dbSDimitry Andric 
2625ffd83dbSDimitry Andric     unsigned ShiftOffset = 8 * Offset;
2635ffd83dbSDimitry Andric     if (IsShr)
2645ffd83dbSDimitry Andric       ShiftOffset += ShiftAmt;
2655ffd83dbSDimitry Andric     else
2665ffd83dbSDimitry Andric       ShiftOffset -= ShiftAmt;
2675ffd83dbSDimitry Andric 
2685ffd83dbSDimitry Andric     MatchInfo.CvtVal = Src0;
2695ffd83dbSDimitry Andric     MatchInfo.ShiftOffset = ShiftOffset;
2705ffd83dbSDimitry Andric     return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
2715ffd83dbSDimitry Andric   }
2725ffd83dbSDimitry Andric 
2735ffd83dbSDimitry Andric   // TODO: Simplify demanded bits.
2745ffd83dbSDimitry Andric   return false;
2755ffd83dbSDimitry Andric }
2765ffd83dbSDimitry Andric 
277e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
278e8d8bef9SDimitry Andric     MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
279e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
2805ffd83dbSDimitry Andric   unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
2815ffd83dbSDimitry Andric 
2825ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
2835ffd83dbSDimitry Andric   Register CvtSrc = MatchInfo.CvtVal;
284e8d8bef9SDimitry Andric   LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
2855ffd83dbSDimitry Andric   if (SrcTy != S32) {
2865ffd83dbSDimitry Andric     assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
2875ffd83dbSDimitry Andric     CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
2885ffd83dbSDimitry Andric   }
2895ffd83dbSDimitry Andric 
2905ffd83dbSDimitry Andric   assert(MI.getOpcode() != NewOpc);
2915ffd83dbSDimitry Andric   B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
2925ffd83dbSDimitry Andric   MI.eraseFromParent();
2935ffd83dbSDimitry Andric }
2945ffd83dbSDimitry Andric 
295fe6060f1SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize(
296fe6060f1SDimitry Andric     MachineInstr &MI, Register &Reg) {
297fe6060f1SDimitry Andric   const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
298fe6060f1SDimitry Andric       MF.getSubtarget().getTargetLowering());
299fe6060f1SDimitry Andric   Reg = MI.getOperand(1).getReg();
300fe6060f1SDimitry Andric   return TLI->isCanonicalized(Reg, MF);
301fe6060f1SDimitry Andric }
302fe6060f1SDimitry Andric 
303e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelperState {
304e8d8bef9SDimitry Andric protected:
305349cc55cSDimitry Andric   AMDGPUCombinerHelper &Helper;
306e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
307e8d8bef9SDimitry Andric 
308*bdd1243dSDimitry Andric   // Note: pointer is necessary because Target Predicates use
309*bdd1243dSDimitry Andric   //   "Subtarget->"
310*bdd1243dSDimitry Andric   const GCNSubtarget *Subtarget;
311*bdd1243dSDimitry Andric 
312e8d8bef9SDimitry Andric public:
313e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelperState(
314349cc55cSDimitry Andric       AMDGPUCombinerHelper &Helper,
315*bdd1243dSDimitry Andric       AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper,
316*bdd1243dSDimitry Andric       const GCNSubtarget &Subtarget)
317*bdd1243dSDimitry Andric       : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper),
318*bdd1243dSDimitry Andric         Subtarget(&Subtarget) {}
319e8d8bef9SDimitry Andric };
320e8d8bef9SDimitry Andric 
3215ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
3225ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
3235ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
3245ffd83dbSDimitry Andric 
3255ffd83dbSDimitry Andric namespace {
3265ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
3275ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
3285ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
3295ffd83dbSDimitry Andric 
330e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
3315ffd83dbSDimitry Andric   GISelKnownBits *KB;
3325ffd83dbSDimitry Andric   MachineDominatorTree *MDT;
333*bdd1243dSDimitry Andric   const GCNSubtarget &Subtarget;
3345ffd83dbSDimitry Andric 
3355ffd83dbSDimitry Andric public:
3365ffd83dbSDimitry Andric   AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
3375ffd83dbSDimitry Andric 
338*bdd1243dSDimitry Andric   AMDGPUPostLegalizerCombinerInfo(const GCNSubtarget &Subtarget, bool EnableOpt,
339*bdd1243dSDimitry Andric                                   bool OptSize, bool MinSize,
3405ffd83dbSDimitry Andric                                   const AMDGPULegalizerInfo *LI,
3415ffd83dbSDimitry Andric                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
3425ffd83dbSDimitry Andric       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
3435ffd83dbSDimitry Andric                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
344*bdd1243dSDimitry Andric         KB(KB), MDT(MDT), Subtarget(Subtarget) {
3455ffd83dbSDimitry Andric     if (!GeneratedRuleCfg.parseCommandLineOption())
3465ffd83dbSDimitry Andric       report_fatal_error("Invalid rule identifier");
3475ffd83dbSDimitry Andric   }
3485ffd83dbSDimitry Andric 
3495ffd83dbSDimitry Andric   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
3505ffd83dbSDimitry Andric                MachineIRBuilder &B) const override;
3515ffd83dbSDimitry Andric };
3525ffd83dbSDimitry Andric 
3535ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
3545ffd83dbSDimitry Andric                                               MachineInstr &MI,
3555ffd83dbSDimitry Andric                                               MachineIRBuilder &B) const {
356*bdd1243dSDimitry Andric   AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT,
357*bdd1243dSDimitry Andric                               LInfo);
358e8d8bef9SDimitry Andric   AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
359*bdd1243dSDimitry Andric   AMDGPUGenPostLegalizerCombinerHelper Generated(
360*bdd1243dSDimitry Andric       GeneratedRuleCfg, Helper, PostLegalizerHelper, Subtarget);
3615ffd83dbSDimitry Andric 
362e8d8bef9SDimitry Andric   if (Generated.tryCombineAll(Observer, MI, B))
3635ffd83dbSDimitry Andric     return true;
3645ffd83dbSDimitry Andric 
3655ffd83dbSDimitry Andric   switch (MI.getOpcode()) {
3665ffd83dbSDimitry Andric   case TargetOpcode::G_SHL:
3675ffd83dbSDimitry Andric   case TargetOpcode::G_LSHR:
3685ffd83dbSDimitry Andric   case TargetOpcode::G_ASHR:
3695ffd83dbSDimitry Andric     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
3705ffd83dbSDimitry Andric     // common case, splitting this into a move and a 32-bit shift is faster and
3715ffd83dbSDimitry Andric     // the same code size.
3725ffd83dbSDimitry Andric     return Helper.tryCombineShiftToUnmerge(MI, 32);
3735ffd83dbSDimitry Andric   }
3745ffd83dbSDimitry Andric 
3755ffd83dbSDimitry Andric   return false;
3765ffd83dbSDimitry Andric }
3775ffd83dbSDimitry Andric 
3785ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
3795ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc"
3805ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
3815ffd83dbSDimitry Andric 
3825ffd83dbSDimitry Andric // Pass boilerplate
3835ffd83dbSDimitry Andric // ================
3845ffd83dbSDimitry Andric 
3855ffd83dbSDimitry Andric class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
3865ffd83dbSDimitry Andric public:
3875ffd83dbSDimitry Andric   static char ID;
3885ffd83dbSDimitry Andric 
3895ffd83dbSDimitry Andric   AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
3905ffd83dbSDimitry Andric 
3915ffd83dbSDimitry Andric   StringRef getPassName() const override {
3925ffd83dbSDimitry Andric     return "AMDGPUPostLegalizerCombiner";
3935ffd83dbSDimitry Andric   }
3945ffd83dbSDimitry Andric 
3955ffd83dbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
3965ffd83dbSDimitry Andric 
3975ffd83dbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
3985ffd83dbSDimitry Andric private:
3995ffd83dbSDimitry Andric   bool IsOptNone;
4005ffd83dbSDimitry Andric };
4015ffd83dbSDimitry Andric } // end anonymous namespace
4025ffd83dbSDimitry Andric 
4035ffd83dbSDimitry Andric void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
4045ffd83dbSDimitry Andric   AU.addRequired<TargetPassConfig>();
4055ffd83dbSDimitry Andric   AU.setPreservesCFG();
4065ffd83dbSDimitry Andric   getSelectionDAGFallbackAnalysisUsage(AU);
4075ffd83dbSDimitry Andric   AU.addRequired<GISelKnownBitsAnalysis>();
4085ffd83dbSDimitry Andric   AU.addPreserved<GISelKnownBitsAnalysis>();
4095ffd83dbSDimitry Andric   if (!IsOptNone) {
4105ffd83dbSDimitry Andric     AU.addRequired<MachineDominatorTree>();
4115ffd83dbSDimitry Andric     AU.addPreserved<MachineDominatorTree>();
4125ffd83dbSDimitry Andric   }
4135ffd83dbSDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
4145ffd83dbSDimitry Andric }
4155ffd83dbSDimitry Andric 
4165ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
4175ffd83dbSDimitry Andric   : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
4185ffd83dbSDimitry Andric   initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
4195ffd83dbSDimitry Andric }
4205ffd83dbSDimitry Andric 
4215ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
4225ffd83dbSDimitry Andric   if (MF.getProperties().hasProperty(
4235ffd83dbSDimitry Andric           MachineFunctionProperties::Property::FailedISel))
4245ffd83dbSDimitry Andric     return false;
4255ffd83dbSDimitry Andric   auto *TPC = &getAnalysis<TargetPassConfig>();
4265ffd83dbSDimitry Andric   const Function &F = MF.getFunction();
4275ffd83dbSDimitry Andric   bool EnableOpt =
4285ffd83dbSDimitry Andric       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
4295ffd83dbSDimitry Andric 
4305ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
4315ffd83dbSDimitry Andric   const AMDGPULegalizerInfo *LI
4325ffd83dbSDimitry Andric     = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
4335ffd83dbSDimitry Andric 
4345ffd83dbSDimitry Andric   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
4355ffd83dbSDimitry Andric   MachineDominatorTree *MDT =
4365ffd83dbSDimitry Andric       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
437*bdd1243dSDimitry Andric   AMDGPUPostLegalizerCombinerInfo PCInfo(ST, EnableOpt, F.hasOptSize(),
4385ffd83dbSDimitry Andric                                          F.hasMinSize(), LI, KB, MDT);
4395ffd83dbSDimitry Andric   Combiner C(PCInfo, TPC);
4405ffd83dbSDimitry Andric   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
4415ffd83dbSDimitry Andric }
4425ffd83dbSDimitry Andric 
4435ffd83dbSDimitry Andric char AMDGPUPostLegalizerCombiner::ID = 0;
4445ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
4455ffd83dbSDimitry Andric                       "Combine AMDGPU machine instrs after legalization",
4465ffd83dbSDimitry Andric                       false, false)
4475ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
4485ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
4495ffd83dbSDimitry Andric INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
4505ffd83dbSDimitry Andric                     "Combine AMDGPU machine instrs after legalization", false,
4515ffd83dbSDimitry Andric                     false)
4525ffd83dbSDimitry Andric 
4535ffd83dbSDimitry Andric namespace llvm {
4545ffd83dbSDimitry Andric FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
4555ffd83dbSDimitry Andric   return new AMDGPUPostLegalizerCombiner(IsOptNone);
4565ffd83dbSDimitry Andric }
4575ffd83dbSDimitry Andric } // end namespace llvm
458