15ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level, 105ffd83dbSDimitry Andric // after the legalizer. 115ffd83dbSDimitry Andric // 125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 135ffd83dbSDimitry Andric 14e8d8bef9SDimitry Andric #include "AMDGPU.h" 15349cc55cSDimitry Andric #include "AMDGPUCombinerHelper.h" 165ffd83dbSDimitry Andric #include "AMDGPULegalizerInfo.h" 17e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 18e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 195ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h" 205ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 215ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 225ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 235ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 245ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 255ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 26*4824e7fdSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 27e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 285ffd83dbSDimitry Andric 295ffd83dbSDimitry Andric #define DEBUG_TYPE "amdgpu-postlegalizer-combiner" 305ffd83dbSDimitry Andric 315ffd83dbSDimitry Andric using namespace llvm; 325ffd83dbSDimitry Andric using namespace MIPatternMatch; 335ffd83dbSDimitry Andric 34e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelper { 35e8d8bef9SDimitry Andric protected: 36e8d8bef9SDimitry Andric MachineIRBuilder &B; 37e8d8bef9SDimitry Andric MachineFunction &MF; 38e8d8bef9SDimitry Andric MachineRegisterInfo &MRI; 39349cc55cSDimitry Andric AMDGPUCombinerHelper &Helper; 40e8d8bef9SDimitry Andric 41e8d8bef9SDimitry Andric public: 42349cc55cSDimitry Andric AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, 43349cc55cSDimitry Andric AMDGPUCombinerHelper &Helper) 44e8d8bef9SDimitry Andric : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){}; 45e8d8bef9SDimitry Andric 465ffd83dbSDimitry Andric struct FMinFMaxLegacyInfo { 475ffd83dbSDimitry Andric Register LHS; 485ffd83dbSDimitry Andric Register RHS; 495ffd83dbSDimitry Andric Register True; 505ffd83dbSDimitry Andric Register False; 515ffd83dbSDimitry Andric CmpInst::Predicate Pred; 525ffd83dbSDimitry Andric }; 535ffd83dbSDimitry Andric 545ffd83dbSDimitry Andric // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize 55e8d8bef9SDimitry Andric bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info); 56e8d8bef9SDimitry Andric void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, 57e8d8bef9SDimitry Andric const FMinFMaxLegacyInfo &Info); 58e8d8bef9SDimitry Andric 59e8d8bef9SDimitry Andric bool matchUCharToFloat(MachineInstr &MI); 60e8d8bef9SDimitry Andric void applyUCharToFloat(MachineInstr &MI); 61e8d8bef9SDimitry Andric 62*4824e7fdSDimitry Andric bool matchRcpSqrtToRsq(MachineInstr &MI, 63*4824e7fdSDimitry Andric std::function<void(MachineIRBuilder &)> &MatchInfo); 64*4824e7fdSDimitry Andric 65e8d8bef9SDimitry Andric // FIXME: Should be able to have 2 separate matchdatas rather than custom 66e8d8bef9SDimitry Andric // struct boilerplate. 67e8d8bef9SDimitry Andric struct CvtF32UByteMatchInfo { 68e8d8bef9SDimitry Andric Register CvtVal; 69e8d8bef9SDimitry Andric unsigned ShiftOffset; 70e8d8bef9SDimitry Andric }; 71e8d8bef9SDimitry Andric 72e8d8bef9SDimitry Andric bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo); 73e8d8bef9SDimitry Andric void applyCvtF32UByteN(MachineInstr &MI, 74e8d8bef9SDimitry Andric const CvtF32UByteMatchInfo &MatchInfo); 75fe6060f1SDimitry Andric 76fe6060f1SDimitry Andric bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg); 77e8d8bef9SDimitry Andric }; 78e8d8bef9SDimitry Andric 79e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy( 80e8d8bef9SDimitry Andric MachineInstr &MI, FMinFMaxLegacyInfo &Info) { 815ffd83dbSDimitry Andric // FIXME: Combines should have subtarget predicates, and we shouldn't need 825ffd83dbSDimitry Andric // this here. 835ffd83dbSDimitry Andric if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy()) 845ffd83dbSDimitry Andric return false; 855ffd83dbSDimitry Andric 865ffd83dbSDimitry Andric // FIXME: Type predicate on pattern 875ffd83dbSDimitry Andric if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32)) 885ffd83dbSDimitry Andric return false; 895ffd83dbSDimitry Andric 905ffd83dbSDimitry Andric Register Cond = MI.getOperand(1).getReg(); 915ffd83dbSDimitry Andric if (!MRI.hasOneNonDBGUse(Cond) || 925ffd83dbSDimitry Andric !mi_match(Cond, MRI, 935ffd83dbSDimitry Andric m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS)))) 945ffd83dbSDimitry Andric return false; 955ffd83dbSDimitry Andric 965ffd83dbSDimitry Andric Info.True = MI.getOperand(2).getReg(); 975ffd83dbSDimitry Andric Info.False = MI.getOperand(3).getReg(); 985ffd83dbSDimitry Andric 995ffd83dbSDimitry Andric if (!(Info.LHS == Info.True && Info.RHS == Info.False) && 1005ffd83dbSDimitry Andric !(Info.LHS == Info.False && Info.RHS == Info.True)) 1015ffd83dbSDimitry Andric return false; 1025ffd83dbSDimitry Andric 1035ffd83dbSDimitry Andric switch (Info.Pred) { 1045ffd83dbSDimitry Andric case CmpInst::FCMP_FALSE: 1055ffd83dbSDimitry Andric case CmpInst::FCMP_OEQ: 1065ffd83dbSDimitry Andric case CmpInst::FCMP_ONE: 1075ffd83dbSDimitry Andric case CmpInst::FCMP_ORD: 1085ffd83dbSDimitry Andric case CmpInst::FCMP_UNO: 1095ffd83dbSDimitry Andric case CmpInst::FCMP_UEQ: 1105ffd83dbSDimitry Andric case CmpInst::FCMP_UNE: 1115ffd83dbSDimitry Andric case CmpInst::FCMP_TRUE: 1125ffd83dbSDimitry Andric return false; 1135ffd83dbSDimitry Andric default: 1145ffd83dbSDimitry Andric return true; 1155ffd83dbSDimitry Andric } 1165ffd83dbSDimitry Andric } 1175ffd83dbSDimitry Andric 118e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy( 119e8d8bef9SDimitry Andric MachineInstr &MI, const FMinFMaxLegacyInfo &Info) { 120e8d8bef9SDimitry Andric B.setInstrAndDebugLoc(MI); 121e8d8bef9SDimitry Andric auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) { 122e8d8bef9SDimitry Andric B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); 1235ffd83dbSDimitry Andric }; 1245ffd83dbSDimitry Andric 1255ffd83dbSDimitry Andric switch (Info.Pred) { 1265ffd83dbSDimitry Andric case CmpInst::FCMP_ULT: 1275ffd83dbSDimitry Andric case CmpInst::FCMP_ULE: 1285ffd83dbSDimitry Andric if (Info.LHS == Info.True) 1295ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 1305ffd83dbSDimitry Andric else 1315ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 1325ffd83dbSDimitry Andric break; 1335ffd83dbSDimitry Andric case CmpInst::FCMP_OLE: 1345ffd83dbSDimitry Andric case CmpInst::FCMP_OLT: { 1355ffd83dbSDimitry Andric // We need to permute the operands to get the correct NaN behavior. The 1365ffd83dbSDimitry Andric // selected operand is the second one based on the failing compare with NaN, 1375ffd83dbSDimitry Andric // so permute it based on the compare type the hardware uses. 1385ffd83dbSDimitry Andric if (Info.LHS == Info.True) 1395ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 1405ffd83dbSDimitry Andric else 1415ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 1425ffd83dbSDimitry Andric break; 1435ffd83dbSDimitry Andric } 1445ffd83dbSDimitry Andric case CmpInst::FCMP_UGE: 1455ffd83dbSDimitry Andric case CmpInst::FCMP_UGT: { 1465ffd83dbSDimitry Andric if (Info.LHS == Info.True) 1475ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 1485ffd83dbSDimitry Andric else 1495ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 1505ffd83dbSDimitry Andric break; 1515ffd83dbSDimitry Andric } 1525ffd83dbSDimitry Andric case CmpInst::FCMP_OGT: 1535ffd83dbSDimitry Andric case CmpInst::FCMP_OGE: { 1545ffd83dbSDimitry Andric if (Info.LHS == Info.True) 1555ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 1565ffd83dbSDimitry Andric else 1575ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 1585ffd83dbSDimitry Andric break; 1595ffd83dbSDimitry Andric } 1605ffd83dbSDimitry Andric default: 1615ffd83dbSDimitry Andric llvm_unreachable("predicate should not have matched"); 1625ffd83dbSDimitry Andric } 1635ffd83dbSDimitry Andric 1645ffd83dbSDimitry Andric MI.eraseFromParent(); 1655ffd83dbSDimitry Andric } 1665ffd83dbSDimitry Andric 167e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) { 1685ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1695ffd83dbSDimitry Andric 1705ffd83dbSDimitry Andric // TODO: We could try to match extracting the higher bytes, which would be 1715ffd83dbSDimitry Andric // easier if i8 vectors weren't promoted to i32 vectors, particularly after 1725ffd83dbSDimitry Andric // types are legalized. v4i8 -> v4f32 is probably the only case to worry 1735ffd83dbSDimitry Andric // about in practice. 1745ffd83dbSDimitry Andric LLT Ty = MRI.getType(DstReg); 1755ffd83dbSDimitry Andric if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) { 1765ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1775ffd83dbSDimitry Andric unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); 1785ffd83dbSDimitry Andric assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64); 1795ffd83dbSDimitry Andric const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8); 1805ffd83dbSDimitry Andric return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask); 1815ffd83dbSDimitry Andric } 1825ffd83dbSDimitry Andric 1835ffd83dbSDimitry Andric return false; 1845ffd83dbSDimitry Andric } 1855ffd83dbSDimitry Andric 186e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) { 187e8d8bef9SDimitry Andric B.setInstrAndDebugLoc(MI); 1885ffd83dbSDimitry Andric 1895ffd83dbSDimitry Andric const LLT S32 = LLT::scalar(32); 1905ffd83dbSDimitry Andric 1915ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1925ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 193e8d8bef9SDimitry Andric LLT Ty = MRI.getType(DstReg); 194e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 1955ffd83dbSDimitry Andric if (SrcTy != S32) 1965ffd83dbSDimitry Andric SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0); 1975ffd83dbSDimitry Andric 1985ffd83dbSDimitry Andric if (Ty == S32) { 1995ffd83dbSDimitry Andric B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, 2005ffd83dbSDimitry Andric {SrcReg}, MI.getFlags()); 2015ffd83dbSDimitry Andric } else { 2025ffd83dbSDimitry Andric auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, 2035ffd83dbSDimitry Andric {SrcReg}, MI.getFlags()); 2045ffd83dbSDimitry Andric B.buildFPTrunc(DstReg, Cvt0, MI.getFlags()); 2055ffd83dbSDimitry Andric } 2065ffd83dbSDimitry Andric 2075ffd83dbSDimitry Andric MI.eraseFromParent(); 2085ffd83dbSDimitry Andric } 2095ffd83dbSDimitry Andric 210*4824e7fdSDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq( 211*4824e7fdSDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 212*4824e7fdSDimitry Andric 213*4824e7fdSDimitry Andric auto getRcpSrc = [=](const MachineInstr &MI) { 214*4824e7fdSDimitry Andric MachineInstr *ResMI = nullptr; 215*4824e7fdSDimitry Andric if (MI.getOpcode() == TargetOpcode::G_INTRINSIC && 216*4824e7fdSDimitry Andric MI.getIntrinsicID() == Intrinsic::amdgcn_rcp) 217*4824e7fdSDimitry Andric ResMI = MRI.getVRegDef(MI.getOperand(2).getReg()); 218*4824e7fdSDimitry Andric 219*4824e7fdSDimitry Andric return ResMI; 220*4824e7fdSDimitry Andric }; 221*4824e7fdSDimitry Andric 222*4824e7fdSDimitry Andric auto getSqrtSrc = [=](const MachineInstr &MI) { 223*4824e7fdSDimitry Andric MachineInstr *SqrtSrcMI = nullptr; 224*4824e7fdSDimitry Andric mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI))); 225*4824e7fdSDimitry Andric return SqrtSrcMI; 226*4824e7fdSDimitry Andric }; 227*4824e7fdSDimitry Andric 228*4824e7fdSDimitry Andric MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr; 229*4824e7fdSDimitry Andric // rcp(sqrt(x)) 230*4824e7fdSDimitry Andric if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) { 231*4824e7fdSDimitry Andric MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) { 232*4824e7fdSDimitry Andric B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false) 233*4824e7fdSDimitry Andric .addUse(SqrtSrcMI->getOperand(0).getReg()) 234*4824e7fdSDimitry Andric .setMIFlags(MI.getFlags()); 235*4824e7fdSDimitry Andric }; 236*4824e7fdSDimitry Andric return true; 237*4824e7fdSDimitry Andric } 238*4824e7fdSDimitry Andric 239*4824e7fdSDimitry Andric // sqrt(rcp(x)) 240*4824e7fdSDimitry Andric if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) { 241*4824e7fdSDimitry Andric MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) { 242*4824e7fdSDimitry Andric B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false) 243*4824e7fdSDimitry Andric .addUse(RcpSrcMI->getOperand(0).getReg()) 244*4824e7fdSDimitry Andric .setMIFlags(MI.getFlags()); 245*4824e7fdSDimitry Andric }; 246*4824e7fdSDimitry Andric return true; 247*4824e7fdSDimitry Andric } 248*4824e7fdSDimitry Andric 249*4824e7fdSDimitry Andric return false; 250*4824e7fdSDimitry Andric } 251*4824e7fdSDimitry Andric 252e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN( 253e8d8bef9SDimitry Andric MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) { 2545ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2555ffd83dbSDimitry Andric 2565ffd83dbSDimitry Andric // Look through G_ZEXT. 2575ffd83dbSDimitry Andric mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg))); 2585ffd83dbSDimitry Andric 2595ffd83dbSDimitry Andric Register Src0; 2605ffd83dbSDimitry Andric int64_t ShiftAmt; 2615ffd83dbSDimitry Andric bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt))); 2625ffd83dbSDimitry Andric if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) { 2635ffd83dbSDimitry Andric const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0; 2645ffd83dbSDimitry Andric 2655ffd83dbSDimitry Andric unsigned ShiftOffset = 8 * Offset; 2665ffd83dbSDimitry Andric if (IsShr) 2675ffd83dbSDimitry Andric ShiftOffset += ShiftAmt; 2685ffd83dbSDimitry Andric else 2695ffd83dbSDimitry Andric ShiftOffset -= ShiftAmt; 2705ffd83dbSDimitry Andric 2715ffd83dbSDimitry Andric MatchInfo.CvtVal = Src0; 2725ffd83dbSDimitry Andric MatchInfo.ShiftOffset = ShiftOffset; 2735ffd83dbSDimitry Andric return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0; 2745ffd83dbSDimitry Andric } 2755ffd83dbSDimitry Andric 2765ffd83dbSDimitry Andric // TODO: Simplify demanded bits. 2775ffd83dbSDimitry Andric return false; 2785ffd83dbSDimitry Andric } 2795ffd83dbSDimitry Andric 280e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN( 281e8d8bef9SDimitry Andric MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) { 282e8d8bef9SDimitry Andric B.setInstrAndDebugLoc(MI); 2835ffd83dbSDimitry Andric unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8; 2845ffd83dbSDimitry Andric 2855ffd83dbSDimitry Andric const LLT S32 = LLT::scalar(32); 2865ffd83dbSDimitry Andric Register CvtSrc = MatchInfo.CvtVal; 287e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(MatchInfo.CvtVal); 2885ffd83dbSDimitry Andric if (SrcTy != S32) { 2895ffd83dbSDimitry Andric assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8); 2905ffd83dbSDimitry Andric CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0); 2915ffd83dbSDimitry Andric } 2925ffd83dbSDimitry Andric 2935ffd83dbSDimitry Andric assert(MI.getOpcode() != NewOpc); 2945ffd83dbSDimitry Andric B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags()); 2955ffd83dbSDimitry Andric MI.eraseFromParent(); 2965ffd83dbSDimitry Andric } 2975ffd83dbSDimitry Andric 298fe6060f1SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize( 299fe6060f1SDimitry Andric MachineInstr &MI, Register &Reg) { 300fe6060f1SDimitry Andric const SITargetLowering *TLI = static_cast<const SITargetLowering *>( 301fe6060f1SDimitry Andric MF.getSubtarget().getTargetLowering()); 302fe6060f1SDimitry Andric Reg = MI.getOperand(1).getReg(); 303fe6060f1SDimitry Andric return TLI->isCanonicalized(Reg, MF); 304fe6060f1SDimitry Andric } 305fe6060f1SDimitry Andric 306e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelperState { 307e8d8bef9SDimitry Andric protected: 308349cc55cSDimitry Andric AMDGPUCombinerHelper &Helper; 309e8d8bef9SDimitry Andric AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper; 310e8d8bef9SDimitry Andric 311e8d8bef9SDimitry Andric public: 312e8d8bef9SDimitry Andric AMDGPUPostLegalizerCombinerHelperState( 313349cc55cSDimitry Andric AMDGPUCombinerHelper &Helper, 314e8d8bef9SDimitry Andric AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper) 315e8d8bef9SDimitry Andric : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {} 316e8d8bef9SDimitry Andric }; 317e8d8bef9SDimitry Andric 3185ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 3195ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc" 3205ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 3215ffd83dbSDimitry Andric 3225ffd83dbSDimitry Andric namespace { 3235ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 3245ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc" 3255ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 3265ffd83dbSDimitry Andric 327e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo { 3285ffd83dbSDimitry Andric GISelKnownBits *KB; 3295ffd83dbSDimitry Andric MachineDominatorTree *MDT; 3305ffd83dbSDimitry Andric 3315ffd83dbSDimitry Andric public: 3325ffd83dbSDimitry Andric AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; 3335ffd83dbSDimitry Andric 3345ffd83dbSDimitry Andric AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 3355ffd83dbSDimitry Andric const AMDGPULegalizerInfo *LI, 3365ffd83dbSDimitry Andric GISelKnownBits *KB, MachineDominatorTree *MDT) 3375ffd83dbSDimitry Andric : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, 3385ffd83dbSDimitry Andric /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), 3395ffd83dbSDimitry Andric KB(KB), MDT(MDT) { 3405ffd83dbSDimitry Andric if (!GeneratedRuleCfg.parseCommandLineOption()) 3415ffd83dbSDimitry Andric report_fatal_error("Invalid rule identifier"); 3425ffd83dbSDimitry Andric } 3435ffd83dbSDimitry Andric 3445ffd83dbSDimitry Andric bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 3455ffd83dbSDimitry Andric MachineIRBuilder &B) const override; 3465ffd83dbSDimitry Andric }; 3475ffd83dbSDimitry Andric 3485ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 3495ffd83dbSDimitry Andric MachineInstr &MI, 3505ffd83dbSDimitry Andric MachineIRBuilder &B) const { 351349cc55cSDimitry Andric AMDGPUCombinerHelper Helper(Observer, B, KB, MDT, LInfo); 352e8d8bef9SDimitry Andric AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper); 353e8d8bef9SDimitry Andric AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper, 354e8d8bef9SDimitry Andric PostLegalizerHelper); 3555ffd83dbSDimitry Andric 356e8d8bef9SDimitry Andric if (Generated.tryCombineAll(Observer, MI, B)) 3575ffd83dbSDimitry Andric return true; 3585ffd83dbSDimitry Andric 3595ffd83dbSDimitry Andric switch (MI.getOpcode()) { 3605ffd83dbSDimitry Andric case TargetOpcode::G_SHL: 3615ffd83dbSDimitry Andric case TargetOpcode::G_LSHR: 3625ffd83dbSDimitry Andric case TargetOpcode::G_ASHR: 3635ffd83dbSDimitry Andric // On some subtargets, 64-bit shift is a quarter rate instruction. In the 3645ffd83dbSDimitry Andric // common case, splitting this into a move and a 32-bit shift is faster and 3655ffd83dbSDimitry Andric // the same code size. 3665ffd83dbSDimitry Andric return Helper.tryCombineShiftToUnmerge(MI, 32); 3675ffd83dbSDimitry Andric } 3685ffd83dbSDimitry Andric 3695ffd83dbSDimitry Andric return false; 3705ffd83dbSDimitry Andric } 3715ffd83dbSDimitry Andric 3725ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 3735ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc" 3745ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 3755ffd83dbSDimitry Andric 3765ffd83dbSDimitry Andric // Pass boilerplate 3775ffd83dbSDimitry Andric // ================ 3785ffd83dbSDimitry Andric 3795ffd83dbSDimitry Andric class AMDGPUPostLegalizerCombiner : public MachineFunctionPass { 3805ffd83dbSDimitry Andric public: 3815ffd83dbSDimitry Andric static char ID; 3825ffd83dbSDimitry Andric 3835ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner(bool IsOptNone = false); 3845ffd83dbSDimitry Andric 3855ffd83dbSDimitry Andric StringRef getPassName() const override { 3865ffd83dbSDimitry Andric return "AMDGPUPostLegalizerCombiner"; 3875ffd83dbSDimitry Andric } 3885ffd83dbSDimitry Andric 3895ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 3905ffd83dbSDimitry Andric 3915ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 3925ffd83dbSDimitry Andric private: 3935ffd83dbSDimitry Andric bool IsOptNone; 3945ffd83dbSDimitry Andric }; 3955ffd83dbSDimitry Andric } // end anonymous namespace 3965ffd83dbSDimitry Andric 3975ffd83dbSDimitry Andric void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 3985ffd83dbSDimitry Andric AU.addRequired<TargetPassConfig>(); 3995ffd83dbSDimitry Andric AU.setPreservesCFG(); 4005ffd83dbSDimitry Andric getSelectionDAGFallbackAnalysisUsage(AU); 4015ffd83dbSDimitry Andric AU.addRequired<GISelKnownBitsAnalysis>(); 4025ffd83dbSDimitry Andric AU.addPreserved<GISelKnownBitsAnalysis>(); 4035ffd83dbSDimitry Andric if (!IsOptNone) { 4045ffd83dbSDimitry Andric AU.addRequired<MachineDominatorTree>(); 4055ffd83dbSDimitry Andric AU.addPreserved<MachineDominatorTree>(); 4065ffd83dbSDimitry Andric } 4075ffd83dbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 4085ffd83dbSDimitry Andric } 4095ffd83dbSDimitry Andric 4105ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone) 4115ffd83dbSDimitry Andric : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 4125ffd83dbSDimitry Andric initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 4135ffd83dbSDimitry Andric } 4145ffd83dbSDimitry Andric 4155ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 4165ffd83dbSDimitry Andric if (MF.getProperties().hasProperty( 4175ffd83dbSDimitry Andric MachineFunctionProperties::Property::FailedISel)) 4185ffd83dbSDimitry Andric return false; 4195ffd83dbSDimitry Andric auto *TPC = &getAnalysis<TargetPassConfig>(); 4205ffd83dbSDimitry Andric const Function &F = MF.getFunction(); 4215ffd83dbSDimitry Andric bool EnableOpt = 4225ffd83dbSDimitry Andric MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 4235ffd83dbSDimitry Andric 4245ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 4255ffd83dbSDimitry Andric const AMDGPULegalizerInfo *LI 4265ffd83dbSDimitry Andric = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 4275ffd83dbSDimitry Andric 4285ffd83dbSDimitry Andric GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 4295ffd83dbSDimitry Andric MachineDominatorTree *MDT = 4305ffd83dbSDimitry Andric IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 4315ffd83dbSDimitry Andric AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 4325ffd83dbSDimitry Andric F.hasMinSize(), LI, KB, MDT); 4335ffd83dbSDimitry Andric Combiner C(PCInfo, TPC); 4345ffd83dbSDimitry Andric return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 4355ffd83dbSDimitry Andric } 4365ffd83dbSDimitry Andric 4375ffd83dbSDimitry Andric char AMDGPUPostLegalizerCombiner::ID = 0; 4385ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 4395ffd83dbSDimitry Andric "Combine AMDGPU machine instrs after legalization", 4405ffd83dbSDimitry Andric false, false) 4415ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 4425ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 4435ffd83dbSDimitry Andric INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 4445ffd83dbSDimitry Andric "Combine AMDGPU machine instrs after legalization", false, 4455ffd83dbSDimitry Andric false) 4465ffd83dbSDimitry Andric 4475ffd83dbSDimitry Andric namespace llvm { 4485ffd83dbSDimitry Andric FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) { 4495ffd83dbSDimitry Andric return new AMDGPUPostLegalizerCombiner(IsOptNone); 4505ffd83dbSDimitry Andric } 4515ffd83dbSDimitry Andric } // end namespace llvm 452