15ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level, 105ffd83dbSDimitry Andric // after the legalizer. 115ffd83dbSDimitry Andric // 125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 135ffd83dbSDimitry Andric 14e8d8bef9SDimitry Andric #include "AMDGPU.h" 15349cc55cSDimitry Andric #include "AMDGPUCombinerHelper.h" 165ffd83dbSDimitry Andric #include "AMDGPULegalizerInfo.h" 17e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 18e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 195ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h" 205ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 215ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 225ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 235ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 245ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 255ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 264824e7fdSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 27e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 285ffd83dbSDimitry Andric 295ffd83dbSDimitry Andric #define DEBUG_TYPE "amdgpu-postlegalizer-combiner" 305ffd83dbSDimitry Andric 315ffd83dbSDimitry Andric using namespace llvm; 325ffd83dbSDimitry Andric using namespace MIPatternMatch; 335ffd83dbSDimitry Andric 34e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelper { 35e8d8bef9SDimitry Andric protected: 36e8d8bef9SDimitry Andric MachineIRBuilder &B; 37e8d8bef9SDimitry Andric MachineFunction &MF; 38e8d8bef9SDimitry Andric MachineRegisterInfo &MRI; 39349cc55cSDimitry Andric AMDGPUCombinerHelper &Helper; 40e8d8bef9SDimitry Andric 41e8d8bef9SDimitry Andric public: 42349cc55cSDimitry Andric AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, 43349cc55cSDimitry Andric AMDGPUCombinerHelper &Helper) 44e8d8bef9SDimitry Andric : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){}; 45e8d8bef9SDimitry Andric 465ffd83dbSDimitry Andric struct FMinFMaxLegacyInfo { 475ffd83dbSDimitry Andric Register LHS; 485ffd83dbSDimitry Andric Register RHS; 495ffd83dbSDimitry Andric Register True; 505ffd83dbSDimitry Andric Register False; 515ffd83dbSDimitry Andric CmpInst::Predicate Pred; 525ffd83dbSDimitry Andric }; 535ffd83dbSDimitry Andric 545ffd83dbSDimitry Andric // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize 55e8d8bef9SDimitry Andric bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info); 56e8d8bef9SDimitry Andric void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, 57e8d8bef9SDimitry Andric const FMinFMaxLegacyInfo &Info); 58e8d8bef9SDimitry Andric 59e8d8bef9SDimitry Andric bool matchUCharToFloat(MachineInstr &MI); 60e8d8bef9SDimitry Andric void applyUCharToFloat(MachineInstr &MI); 61e8d8bef9SDimitry Andric 624824e7fdSDimitry Andric bool matchRcpSqrtToRsq(MachineInstr &MI, 634824e7fdSDimitry Andric std::function<void(MachineIRBuilder &)> &MatchInfo); 644824e7fdSDimitry Andric 65e8d8bef9SDimitry Andric // FIXME: Should be able to have 2 separate matchdatas rather than custom 66e8d8bef9SDimitry Andric // struct boilerplate. 67e8d8bef9SDimitry Andric struct CvtF32UByteMatchInfo { 68e8d8bef9SDimitry Andric Register CvtVal; 69e8d8bef9SDimitry Andric unsigned ShiftOffset; 70e8d8bef9SDimitry Andric }; 71e8d8bef9SDimitry Andric 72e8d8bef9SDimitry Andric bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo); 73e8d8bef9SDimitry Andric void applyCvtF32UByteN(MachineInstr &MI, 74e8d8bef9SDimitry Andric const CvtF32UByteMatchInfo &MatchInfo); 75fe6060f1SDimitry Andric 76fe6060f1SDimitry Andric bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg); 77e8d8bef9SDimitry Andric }; 78e8d8bef9SDimitry Andric 79e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy( 80e8d8bef9SDimitry Andric MachineInstr &MI, FMinFMaxLegacyInfo &Info) { 815ffd83dbSDimitry Andric // FIXME: Type predicate on pattern 825ffd83dbSDimitry Andric if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32)) 835ffd83dbSDimitry Andric return false; 845ffd83dbSDimitry Andric 855ffd83dbSDimitry Andric Register Cond = MI.getOperand(1).getReg(); 865ffd83dbSDimitry Andric if (!MRI.hasOneNonDBGUse(Cond) || 875ffd83dbSDimitry Andric !mi_match(Cond, MRI, 885ffd83dbSDimitry Andric m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS)))) 895ffd83dbSDimitry Andric return false; 905ffd83dbSDimitry Andric 915ffd83dbSDimitry Andric Info.True = MI.getOperand(2).getReg(); 925ffd83dbSDimitry Andric Info.False = MI.getOperand(3).getReg(); 935ffd83dbSDimitry Andric 945ffd83dbSDimitry Andric if (!(Info.LHS == Info.True && Info.RHS == Info.False) && 955ffd83dbSDimitry Andric !(Info.LHS == Info.False && Info.RHS == Info.True)) 965ffd83dbSDimitry Andric return false; 975ffd83dbSDimitry Andric 985ffd83dbSDimitry Andric switch (Info.Pred) { 995ffd83dbSDimitry Andric case CmpInst::FCMP_FALSE: 1005ffd83dbSDimitry Andric case CmpInst::FCMP_OEQ: 1015ffd83dbSDimitry Andric case CmpInst::FCMP_ONE: 1025ffd83dbSDimitry Andric case CmpInst::FCMP_ORD: 1035ffd83dbSDimitry Andric case CmpInst::FCMP_UNO: 1045ffd83dbSDimitry Andric case CmpInst::FCMP_UEQ: 1055ffd83dbSDimitry Andric case CmpInst::FCMP_UNE: 1065ffd83dbSDimitry Andric case CmpInst::FCMP_TRUE: 1075ffd83dbSDimitry Andric return false; 1085ffd83dbSDimitry Andric default: 1095ffd83dbSDimitry Andric return true; 1105ffd83dbSDimitry Andric } 1115ffd83dbSDimitry Andric } 1125ffd83dbSDimitry Andric 113e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy( 114e8d8bef9SDimitry Andric MachineInstr &MI, const FMinFMaxLegacyInfo &Info) { 115e8d8bef9SDimitry Andric B.setInstrAndDebugLoc(MI); 116e8d8bef9SDimitry Andric auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) { 117e8d8bef9SDimitry Andric B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); 1185ffd83dbSDimitry Andric }; 1195ffd83dbSDimitry Andric 1205ffd83dbSDimitry Andric switch (Info.Pred) { 1215ffd83dbSDimitry Andric case CmpInst::FCMP_ULT: 1225ffd83dbSDimitry Andric case CmpInst::FCMP_ULE: 1235ffd83dbSDimitry Andric if (Info.LHS == Info.True) 1245ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 1255ffd83dbSDimitry Andric else 1265ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 1275ffd83dbSDimitry Andric break; 1285ffd83dbSDimitry Andric case CmpInst::FCMP_OLE: 1295ffd83dbSDimitry Andric case CmpInst::FCMP_OLT: { 1305ffd83dbSDimitry Andric // We need to permute the operands to get the correct NaN behavior. The 1315ffd83dbSDimitry Andric // selected operand is the second one based on the failing compare with NaN, 1325ffd83dbSDimitry Andric // so permute it based on the compare type the hardware uses. 1335ffd83dbSDimitry Andric if (Info.LHS == Info.True) 1345ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 1355ffd83dbSDimitry Andric else 1365ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 1375ffd83dbSDimitry Andric break; 1385ffd83dbSDimitry Andric } 1395ffd83dbSDimitry Andric case CmpInst::FCMP_UGE: 1405ffd83dbSDimitry Andric case CmpInst::FCMP_UGT: { 1415ffd83dbSDimitry Andric if (Info.LHS == Info.True) 1425ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 1435ffd83dbSDimitry Andric else 1445ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 1455ffd83dbSDimitry Andric break; 1465ffd83dbSDimitry Andric } 1475ffd83dbSDimitry Andric case CmpInst::FCMP_OGT: 1485ffd83dbSDimitry Andric case CmpInst::FCMP_OGE: { 1495ffd83dbSDimitry Andric if (Info.LHS == Info.True) 1505ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 1515ffd83dbSDimitry Andric else 1525ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 1535ffd83dbSDimitry Andric break; 1545ffd83dbSDimitry Andric } 1555ffd83dbSDimitry Andric default: 1565ffd83dbSDimitry Andric llvm_unreachable("predicate should not have matched"); 1575ffd83dbSDimitry Andric } 1585ffd83dbSDimitry Andric 1595ffd83dbSDimitry Andric MI.eraseFromParent(); 1605ffd83dbSDimitry Andric } 1615ffd83dbSDimitry Andric 162e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) { 1635ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1645ffd83dbSDimitry Andric 1655ffd83dbSDimitry Andric // TODO: We could try to match extracting the higher bytes, which would be 1665ffd83dbSDimitry Andric // easier if i8 vectors weren't promoted to i32 vectors, particularly after 1675ffd83dbSDimitry Andric // types are legalized. v4i8 -> v4f32 is probably the only case to worry 1685ffd83dbSDimitry Andric // about in practice. 1695ffd83dbSDimitry Andric LLT Ty = MRI.getType(DstReg); 1705ffd83dbSDimitry Andric if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) { 1715ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1725ffd83dbSDimitry Andric unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); 1735ffd83dbSDimitry Andric assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64); 1745ffd83dbSDimitry Andric const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8); 1755ffd83dbSDimitry Andric return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask); 1765ffd83dbSDimitry Andric } 1775ffd83dbSDimitry Andric 1785ffd83dbSDimitry Andric return false; 1795ffd83dbSDimitry Andric } 1805ffd83dbSDimitry Andric 181e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) { 182e8d8bef9SDimitry Andric B.setInstrAndDebugLoc(MI); 1835ffd83dbSDimitry Andric 1845ffd83dbSDimitry Andric const LLT S32 = LLT::scalar(32); 1855ffd83dbSDimitry Andric 1865ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1875ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 188e8d8bef9SDimitry Andric LLT Ty = MRI.getType(DstReg); 189e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 1905ffd83dbSDimitry Andric if (SrcTy != S32) 1915ffd83dbSDimitry Andric SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0); 1925ffd83dbSDimitry Andric 1935ffd83dbSDimitry Andric if (Ty == S32) { 1945ffd83dbSDimitry Andric B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, 1955ffd83dbSDimitry Andric {SrcReg}, MI.getFlags()); 1965ffd83dbSDimitry Andric } else { 1975ffd83dbSDimitry Andric auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, 1985ffd83dbSDimitry Andric {SrcReg}, MI.getFlags()); 1995ffd83dbSDimitry Andric B.buildFPTrunc(DstReg, Cvt0, MI.getFlags()); 2005ffd83dbSDimitry Andric } 2015ffd83dbSDimitry Andric 2025ffd83dbSDimitry Andric MI.eraseFromParent(); 2035ffd83dbSDimitry Andric } 2045ffd83dbSDimitry Andric 2054824e7fdSDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq( 2064824e7fdSDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 2074824e7fdSDimitry Andric 2084824e7fdSDimitry Andric auto getRcpSrc = [=](const MachineInstr &MI) { 2094824e7fdSDimitry Andric MachineInstr *ResMI = nullptr; 2104824e7fdSDimitry Andric if (MI.getOpcode() == TargetOpcode::G_INTRINSIC && 2114824e7fdSDimitry Andric MI.getIntrinsicID() == Intrinsic::amdgcn_rcp) 2124824e7fdSDimitry Andric ResMI = MRI.getVRegDef(MI.getOperand(2).getReg()); 2134824e7fdSDimitry Andric 2144824e7fdSDimitry Andric return ResMI; 2154824e7fdSDimitry Andric }; 2164824e7fdSDimitry Andric 2174824e7fdSDimitry Andric auto getSqrtSrc = [=](const MachineInstr &MI) { 2184824e7fdSDimitry Andric MachineInstr *SqrtSrcMI = nullptr; 219*bdd1243dSDimitry Andric auto Match = 2204824e7fdSDimitry Andric mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI))); 221*bdd1243dSDimitry Andric (void)Match; 2224824e7fdSDimitry Andric return SqrtSrcMI; 2234824e7fdSDimitry Andric }; 2244824e7fdSDimitry Andric 2254824e7fdSDimitry Andric MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr; 2264824e7fdSDimitry Andric // rcp(sqrt(x)) 2274824e7fdSDimitry Andric if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) { 2284824e7fdSDimitry Andric MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) { 2294824e7fdSDimitry Andric B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false) 2304824e7fdSDimitry Andric .addUse(SqrtSrcMI->getOperand(0).getReg()) 2314824e7fdSDimitry Andric .setMIFlags(MI.getFlags()); 2324824e7fdSDimitry Andric }; 2334824e7fdSDimitry Andric return true; 2344824e7fdSDimitry Andric } 2354824e7fdSDimitry Andric 2364824e7fdSDimitry Andric // sqrt(rcp(x)) 2374824e7fdSDimitry Andric if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) { 2384824e7fdSDimitry Andric MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) { 2394824e7fdSDimitry Andric B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false) 2404824e7fdSDimitry Andric .addUse(RcpSrcMI->getOperand(0).getReg()) 2414824e7fdSDimitry Andric .setMIFlags(MI.getFlags()); 2424824e7fdSDimitry Andric }; 2434824e7fdSDimitry Andric return true; 2444824e7fdSDimitry Andric } 2454824e7fdSDimitry Andric 2464824e7fdSDimitry Andric return false; 2474824e7fdSDimitry Andric } 2484824e7fdSDimitry Andric 249e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN( 250e8d8bef9SDimitry Andric MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) { 2515ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2525ffd83dbSDimitry Andric 2535ffd83dbSDimitry Andric // Look through G_ZEXT. 254*bdd1243dSDimitry Andric bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg))); 2555ffd83dbSDimitry Andric 2565ffd83dbSDimitry Andric Register Src0; 2575ffd83dbSDimitry Andric int64_t ShiftAmt; 258*bdd1243dSDimitry Andric IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt))); 2595ffd83dbSDimitry Andric if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) { 2605ffd83dbSDimitry Andric const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0; 2615ffd83dbSDimitry Andric 2625ffd83dbSDimitry Andric unsigned ShiftOffset = 8 * Offset; 2635ffd83dbSDimitry Andric if (IsShr) 2645ffd83dbSDimitry Andric ShiftOffset += ShiftAmt; 2655ffd83dbSDimitry Andric else 2665ffd83dbSDimitry Andric ShiftOffset -= ShiftAmt; 2675ffd83dbSDimitry Andric 2685ffd83dbSDimitry Andric MatchInfo.CvtVal = Src0; 2695ffd83dbSDimitry Andric MatchInfo.ShiftOffset = ShiftOffset; 2705ffd83dbSDimitry Andric return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0; 2715ffd83dbSDimitry Andric } 2725ffd83dbSDimitry Andric 2735ffd83dbSDimitry Andric // TODO: Simplify demanded bits. 2745ffd83dbSDimitry Andric return false; 2755ffd83dbSDimitry Andric } 2765ffd83dbSDimitry Andric 277e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN( 278e8d8bef9SDimitry Andric MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) { 279e8d8bef9SDimitry Andric B.setInstrAndDebugLoc(MI); 2805ffd83dbSDimitry Andric unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8; 2815ffd83dbSDimitry Andric 2825ffd83dbSDimitry Andric const LLT S32 = LLT::scalar(32); 2835ffd83dbSDimitry Andric Register CvtSrc = MatchInfo.CvtVal; 284e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(MatchInfo.CvtVal); 2855ffd83dbSDimitry Andric if (SrcTy != S32) { 2865ffd83dbSDimitry Andric assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8); 2875ffd83dbSDimitry Andric CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0); 2885ffd83dbSDimitry Andric } 2895ffd83dbSDimitry Andric 2905ffd83dbSDimitry Andric assert(MI.getOpcode() != NewOpc); 2915ffd83dbSDimitry Andric B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags()); 2925ffd83dbSDimitry Andric MI.eraseFromParent(); 2935ffd83dbSDimitry Andric } 2945ffd83dbSDimitry Andric 295fe6060f1SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize( 296fe6060f1SDimitry Andric MachineInstr &MI, Register &Reg) { 297fe6060f1SDimitry Andric const SITargetLowering *TLI = static_cast<const SITargetLowering *>( 298fe6060f1SDimitry Andric MF.getSubtarget().getTargetLowering()); 299fe6060f1SDimitry Andric Reg = MI.getOperand(1).getReg(); 300fe6060f1SDimitry Andric return TLI->isCanonicalized(Reg, MF); 301fe6060f1SDimitry Andric } 302fe6060f1SDimitry Andric 303e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelperState { 304e8d8bef9SDimitry Andric protected: 305349cc55cSDimitry Andric AMDGPUCombinerHelper &Helper; 306e8d8bef9SDimitry Andric AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper; 307e8d8bef9SDimitry Andric 308*bdd1243dSDimitry Andric // Note: pointer is necessary because Target Predicates use 309*bdd1243dSDimitry Andric // "Subtarget->" 310*bdd1243dSDimitry Andric const GCNSubtarget *Subtarget; 311*bdd1243dSDimitry Andric 312e8d8bef9SDimitry Andric public: 313e8d8bef9SDimitry Andric AMDGPUPostLegalizerCombinerHelperState( 314349cc55cSDimitry Andric AMDGPUCombinerHelper &Helper, 315*bdd1243dSDimitry Andric AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper, 316*bdd1243dSDimitry Andric const GCNSubtarget &Subtarget) 317*bdd1243dSDimitry Andric : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper), 318*bdd1243dSDimitry Andric Subtarget(&Subtarget) {} 319e8d8bef9SDimitry Andric }; 320e8d8bef9SDimitry Andric 3215ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 3225ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc" 3235ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 3245ffd83dbSDimitry Andric 3255ffd83dbSDimitry Andric namespace { 3265ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 3275ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc" 3285ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 3295ffd83dbSDimitry Andric 330e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo { 3315ffd83dbSDimitry Andric GISelKnownBits *KB; 3325ffd83dbSDimitry Andric MachineDominatorTree *MDT; 333*bdd1243dSDimitry Andric const GCNSubtarget &Subtarget; 3345ffd83dbSDimitry Andric 3355ffd83dbSDimitry Andric public: 3365ffd83dbSDimitry Andric AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; 3375ffd83dbSDimitry Andric 338*bdd1243dSDimitry Andric AMDGPUPostLegalizerCombinerInfo(const GCNSubtarget &Subtarget, bool EnableOpt, 339*bdd1243dSDimitry Andric bool OptSize, bool MinSize, 3405ffd83dbSDimitry Andric const AMDGPULegalizerInfo *LI, 3415ffd83dbSDimitry Andric GISelKnownBits *KB, MachineDominatorTree *MDT) 3425ffd83dbSDimitry Andric : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, 3435ffd83dbSDimitry Andric /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), 344*bdd1243dSDimitry Andric KB(KB), MDT(MDT), Subtarget(Subtarget) { 3455ffd83dbSDimitry Andric if (!GeneratedRuleCfg.parseCommandLineOption()) 3465ffd83dbSDimitry Andric report_fatal_error("Invalid rule identifier"); 3475ffd83dbSDimitry Andric } 3485ffd83dbSDimitry Andric 3495ffd83dbSDimitry Andric bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 3505ffd83dbSDimitry Andric MachineIRBuilder &B) const override; 3515ffd83dbSDimitry Andric }; 3525ffd83dbSDimitry Andric 3535ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 3545ffd83dbSDimitry Andric MachineInstr &MI, 3555ffd83dbSDimitry Andric MachineIRBuilder &B) const { 356*bdd1243dSDimitry Andric AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT, 357*bdd1243dSDimitry Andric LInfo); 358e8d8bef9SDimitry Andric AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper); 359*bdd1243dSDimitry Andric AMDGPUGenPostLegalizerCombinerHelper Generated( 360*bdd1243dSDimitry Andric GeneratedRuleCfg, Helper, PostLegalizerHelper, Subtarget); 3615ffd83dbSDimitry Andric 362e8d8bef9SDimitry Andric if (Generated.tryCombineAll(Observer, MI, B)) 3635ffd83dbSDimitry Andric return true; 3645ffd83dbSDimitry Andric 3655ffd83dbSDimitry Andric switch (MI.getOpcode()) { 3665ffd83dbSDimitry Andric case TargetOpcode::G_SHL: 3675ffd83dbSDimitry Andric case TargetOpcode::G_LSHR: 3685ffd83dbSDimitry Andric case TargetOpcode::G_ASHR: 3695ffd83dbSDimitry Andric // On some subtargets, 64-bit shift is a quarter rate instruction. In the 3705ffd83dbSDimitry Andric // common case, splitting this into a move and a 32-bit shift is faster and 3715ffd83dbSDimitry Andric // the same code size. 3725ffd83dbSDimitry Andric return Helper.tryCombineShiftToUnmerge(MI, 32); 3735ffd83dbSDimitry Andric } 3745ffd83dbSDimitry Andric 3755ffd83dbSDimitry Andric return false; 3765ffd83dbSDimitry Andric } 3775ffd83dbSDimitry Andric 3785ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 3795ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc" 3805ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 3815ffd83dbSDimitry Andric 3825ffd83dbSDimitry Andric // Pass boilerplate 3835ffd83dbSDimitry Andric // ================ 3845ffd83dbSDimitry Andric 3855ffd83dbSDimitry Andric class AMDGPUPostLegalizerCombiner : public MachineFunctionPass { 3865ffd83dbSDimitry Andric public: 3875ffd83dbSDimitry Andric static char ID; 3885ffd83dbSDimitry Andric 3895ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner(bool IsOptNone = false); 3905ffd83dbSDimitry Andric 3915ffd83dbSDimitry Andric StringRef getPassName() const override { 3925ffd83dbSDimitry Andric return "AMDGPUPostLegalizerCombiner"; 3935ffd83dbSDimitry Andric } 3945ffd83dbSDimitry Andric 3955ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 3965ffd83dbSDimitry Andric 3975ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 3985ffd83dbSDimitry Andric private: 3995ffd83dbSDimitry Andric bool IsOptNone; 4005ffd83dbSDimitry Andric }; 4015ffd83dbSDimitry Andric } // end anonymous namespace 4025ffd83dbSDimitry Andric 4035ffd83dbSDimitry Andric void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 4045ffd83dbSDimitry Andric AU.addRequired<TargetPassConfig>(); 4055ffd83dbSDimitry Andric AU.setPreservesCFG(); 4065ffd83dbSDimitry Andric getSelectionDAGFallbackAnalysisUsage(AU); 4075ffd83dbSDimitry Andric AU.addRequired<GISelKnownBitsAnalysis>(); 4085ffd83dbSDimitry Andric AU.addPreserved<GISelKnownBitsAnalysis>(); 4095ffd83dbSDimitry Andric if (!IsOptNone) { 4105ffd83dbSDimitry Andric AU.addRequired<MachineDominatorTree>(); 4115ffd83dbSDimitry Andric AU.addPreserved<MachineDominatorTree>(); 4125ffd83dbSDimitry Andric } 4135ffd83dbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 4145ffd83dbSDimitry Andric } 4155ffd83dbSDimitry Andric 4165ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone) 4175ffd83dbSDimitry Andric : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 4185ffd83dbSDimitry Andric initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 4195ffd83dbSDimitry Andric } 4205ffd83dbSDimitry Andric 4215ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 4225ffd83dbSDimitry Andric if (MF.getProperties().hasProperty( 4235ffd83dbSDimitry Andric MachineFunctionProperties::Property::FailedISel)) 4245ffd83dbSDimitry Andric return false; 4255ffd83dbSDimitry Andric auto *TPC = &getAnalysis<TargetPassConfig>(); 4265ffd83dbSDimitry Andric const Function &F = MF.getFunction(); 4275ffd83dbSDimitry Andric bool EnableOpt = 4285ffd83dbSDimitry Andric MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 4295ffd83dbSDimitry Andric 4305ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 4315ffd83dbSDimitry Andric const AMDGPULegalizerInfo *LI 4325ffd83dbSDimitry Andric = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 4335ffd83dbSDimitry Andric 4345ffd83dbSDimitry Andric GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 4355ffd83dbSDimitry Andric MachineDominatorTree *MDT = 4365ffd83dbSDimitry Andric IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 437*bdd1243dSDimitry Andric AMDGPUPostLegalizerCombinerInfo PCInfo(ST, EnableOpt, F.hasOptSize(), 4385ffd83dbSDimitry Andric F.hasMinSize(), LI, KB, MDT); 4395ffd83dbSDimitry Andric Combiner C(PCInfo, TPC); 4405ffd83dbSDimitry Andric return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 4415ffd83dbSDimitry Andric } 4425ffd83dbSDimitry Andric 4435ffd83dbSDimitry Andric char AMDGPUPostLegalizerCombiner::ID = 0; 4445ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 4455ffd83dbSDimitry Andric "Combine AMDGPU machine instrs after legalization", 4465ffd83dbSDimitry Andric false, false) 4475ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 4485ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 4495ffd83dbSDimitry Andric INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 4505ffd83dbSDimitry Andric "Combine AMDGPU machine instrs after legalization", false, 4515ffd83dbSDimitry Andric false) 4525ffd83dbSDimitry Andric 4535ffd83dbSDimitry Andric namespace llvm { 4545ffd83dbSDimitry Andric FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) { 4555ffd83dbSDimitry Andric return new AMDGPUPostLegalizerCombiner(IsOptNone); 4565ffd83dbSDimitry Andric } 4575ffd83dbSDimitry Andric } // end namespace llvm 458