15ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level, 105ffd83dbSDimitry Andric // after the legalizer. 115ffd83dbSDimitry Andric // 125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 135ffd83dbSDimitry Andric 14*e8d8bef9SDimitry Andric #include "AMDGPU.h" 155ffd83dbSDimitry Andric #include "AMDGPULegalizerInfo.h" 16*e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 17*e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 185ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h" 195ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 205ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 215ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 225ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 235ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 245ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 25*e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 265ffd83dbSDimitry Andric 275ffd83dbSDimitry Andric #define DEBUG_TYPE "amdgpu-postlegalizer-combiner" 285ffd83dbSDimitry Andric 295ffd83dbSDimitry Andric using namespace llvm; 305ffd83dbSDimitry Andric using namespace MIPatternMatch; 315ffd83dbSDimitry Andric 32*e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelper { 33*e8d8bef9SDimitry Andric protected: 34*e8d8bef9SDimitry Andric MachineIRBuilder &B; 35*e8d8bef9SDimitry Andric MachineFunction &MF; 36*e8d8bef9SDimitry Andric MachineRegisterInfo &MRI; 37*e8d8bef9SDimitry Andric CombinerHelper &Helper; 38*e8d8bef9SDimitry Andric 39*e8d8bef9SDimitry Andric public: 40*e8d8bef9SDimitry Andric AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper) 41*e8d8bef9SDimitry Andric : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){}; 42*e8d8bef9SDimitry Andric 435ffd83dbSDimitry Andric struct FMinFMaxLegacyInfo { 445ffd83dbSDimitry Andric Register LHS; 455ffd83dbSDimitry Andric Register RHS; 465ffd83dbSDimitry Andric Register True; 475ffd83dbSDimitry Andric Register False; 485ffd83dbSDimitry Andric CmpInst::Predicate Pred; 495ffd83dbSDimitry Andric }; 505ffd83dbSDimitry Andric 515ffd83dbSDimitry Andric // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize 52*e8d8bef9SDimitry Andric bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info); 53*e8d8bef9SDimitry Andric void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, 54*e8d8bef9SDimitry Andric const FMinFMaxLegacyInfo &Info); 55*e8d8bef9SDimitry Andric 56*e8d8bef9SDimitry Andric bool matchUCharToFloat(MachineInstr &MI); 57*e8d8bef9SDimitry Andric void applyUCharToFloat(MachineInstr &MI); 58*e8d8bef9SDimitry Andric 59*e8d8bef9SDimitry Andric // FIXME: Should be able to have 2 separate matchdatas rather than custom 60*e8d8bef9SDimitry Andric // struct boilerplate. 61*e8d8bef9SDimitry Andric struct CvtF32UByteMatchInfo { 62*e8d8bef9SDimitry Andric Register CvtVal; 63*e8d8bef9SDimitry Andric unsigned ShiftOffset; 64*e8d8bef9SDimitry Andric }; 65*e8d8bef9SDimitry Andric 66*e8d8bef9SDimitry Andric bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo); 67*e8d8bef9SDimitry Andric void applyCvtF32UByteN(MachineInstr &MI, 68*e8d8bef9SDimitry Andric const CvtF32UByteMatchInfo &MatchInfo); 69*e8d8bef9SDimitry Andric }; 70*e8d8bef9SDimitry Andric 71*e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy( 72*e8d8bef9SDimitry Andric MachineInstr &MI, FMinFMaxLegacyInfo &Info) { 735ffd83dbSDimitry Andric // FIXME: Combines should have subtarget predicates, and we shouldn't need 745ffd83dbSDimitry Andric // this here. 755ffd83dbSDimitry Andric if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy()) 765ffd83dbSDimitry Andric return false; 775ffd83dbSDimitry Andric 785ffd83dbSDimitry Andric // FIXME: Type predicate on pattern 795ffd83dbSDimitry Andric if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32)) 805ffd83dbSDimitry Andric return false; 815ffd83dbSDimitry Andric 825ffd83dbSDimitry Andric Register Cond = MI.getOperand(1).getReg(); 835ffd83dbSDimitry Andric if (!MRI.hasOneNonDBGUse(Cond) || 845ffd83dbSDimitry Andric !mi_match(Cond, MRI, 855ffd83dbSDimitry Andric m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS)))) 865ffd83dbSDimitry Andric return false; 875ffd83dbSDimitry Andric 885ffd83dbSDimitry Andric Info.True = MI.getOperand(2).getReg(); 895ffd83dbSDimitry Andric Info.False = MI.getOperand(3).getReg(); 905ffd83dbSDimitry Andric 915ffd83dbSDimitry Andric if (!(Info.LHS == Info.True && Info.RHS == Info.False) && 925ffd83dbSDimitry Andric !(Info.LHS == Info.False && Info.RHS == Info.True)) 935ffd83dbSDimitry Andric return false; 945ffd83dbSDimitry Andric 955ffd83dbSDimitry Andric switch (Info.Pred) { 965ffd83dbSDimitry Andric case CmpInst::FCMP_FALSE: 975ffd83dbSDimitry Andric case CmpInst::FCMP_OEQ: 985ffd83dbSDimitry Andric case CmpInst::FCMP_ONE: 995ffd83dbSDimitry Andric case CmpInst::FCMP_ORD: 1005ffd83dbSDimitry Andric case CmpInst::FCMP_UNO: 1015ffd83dbSDimitry Andric case CmpInst::FCMP_UEQ: 1025ffd83dbSDimitry Andric case CmpInst::FCMP_UNE: 1035ffd83dbSDimitry Andric case CmpInst::FCMP_TRUE: 1045ffd83dbSDimitry Andric return false; 1055ffd83dbSDimitry Andric default: 1065ffd83dbSDimitry Andric return true; 1075ffd83dbSDimitry Andric } 1085ffd83dbSDimitry Andric } 1095ffd83dbSDimitry Andric 110*e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy( 111*e8d8bef9SDimitry Andric MachineInstr &MI, const FMinFMaxLegacyInfo &Info) { 112*e8d8bef9SDimitry Andric B.setInstrAndDebugLoc(MI); 113*e8d8bef9SDimitry Andric auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) { 114*e8d8bef9SDimitry Andric B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); 1155ffd83dbSDimitry Andric }; 1165ffd83dbSDimitry Andric 1175ffd83dbSDimitry Andric switch (Info.Pred) { 1185ffd83dbSDimitry Andric case CmpInst::FCMP_ULT: 1195ffd83dbSDimitry Andric case CmpInst::FCMP_ULE: 1205ffd83dbSDimitry Andric if (Info.LHS == Info.True) 1215ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 1225ffd83dbSDimitry Andric else 1235ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 1245ffd83dbSDimitry Andric break; 1255ffd83dbSDimitry Andric case CmpInst::FCMP_OLE: 1265ffd83dbSDimitry Andric case CmpInst::FCMP_OLT: { 1275ffd83dbSDimitry Andric // We need to permute the operands to get the correct NaN behavior. The 1285ffd83dbSDimitry Andric // selected operand is the second one based on the failing compare with NaN, 1295ffd83dbSDimitry Andric // so permute it based on the compare type the hardware uses. 1305ffd83dbSDimitry Andric if (Info.LHS == Info.True) 1315ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 1325ffd83dbSDimitry Andric else 1335ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 1345ffd83dbSDimitry Andric break; 1355ffd83dbSDimitry Andric } 1365ffd83dbSDimitry Andric case CmpInst::FCMP_UGE: 1375ffd83dbSDimitry Andric case CmpInst::FCMP_UGT: { 1385ffd83dbSDimitry Andric if (Info.LHS == Info.True) 1395ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 1405ffd83dbSDimitry Andric else 1415ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 1425ffd83dbSDimitry Andric break; 1435ffd83dbSDimitry Andric } 1445ffd83dbSDimitry Andric case CmpInst::FCMP_OGT: 1455ffd83dbSDimitry Andric case CmpInst::FCMP_OGE: { 1465ffd83dbSDimitry Andric if (Info.LHS == Info.True) 1475ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 1485ffd83dbSDimitry Andric else 1495ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 1505ffd83dbSDimitry Andric break; 1515ffd83dbSDimitry Andric } 1525ffd83dbSDimitry Andric default: 1535ffd83dbSDimitry Andric llvm_unreachable("predicate should not have matched"); 1545ffd83dbSDimitry Andric } 1555ffd83dbSDimitry Andric 1565ffd83dbSDimitry Andric MI.eraseFromParent(); 1575ffd83dbSDimitry Andric } 1585ffd83dbSDimitry Andric 159*e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) { 1605ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1615ffd83dbSDimitry Andric 1625ffd83dbSDimitry Andric // TODO: We could try to match extracting the higher bytes, which would be 1635ffd83dbSDimitry Andric // easier if i8 vectors weren't promoted to i32 vectors, particularly after 1645ffd83dbSDimitry Andric // types are legalized. v4i8 -> v4f32 is probably the only case to worry 1655ffd83dbSDimitry Andric // about in practice. 1665ffd83dbSDimitry Andric LLT Ty = MRI.getType(DstReg); 1675ffd83dbSDimitry Andric if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) { 1685ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1695ffd83dbSDimitry Andric unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); 1705ffd83dbSDimitry Andric assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64); 1715ffd83dbSDimitry Andric const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8); 1725ffd83dbSDimitry Andric return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask); 1735ffd83dbSDimitry Andric } 1745ffd83dbSDimitry Andric 1755ffd83dbSDimitry Andric return false; 1765ffd83dbSDimitry Andric } 1775ffd83dbSDimitry Andric 178*e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) { 179*e8d8bef9SDimitry Andric B.setInstrAndDebugLoc(MI); 1805ffd83dbSDimitry Andric 1815ffd83dbSDimitry Andric const LLT S32 = LLT::scalar(32); 1825ffd83dbSDimitry Andric 1835ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1845ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 185*e8d8bef9SDimitry Andric LLT Ty = MRI.getType(DstReg); 186*e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 1875ffd83dbSDimitry Andric if (SrcTy != S32) 1885ffd83dbSDimitry Andric SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0); 1895ffd83dbSDimitry Andric 1905ffd83dbSDimitry Andric if (Ty == S32) { 1915ffd83dbSDimitry Andric B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, 1925ffd83dbSDimitry Andric {SrcReg}, MI.getFlags()); 1935ffd83dbSDimitry Andric } else { 1945ffd83dbSDimitry Andric auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, 1955ffd83dbSDimitry Andric {SrcReg}, MI.getFlags()); 1965ffd83dbSDimitry Andric B.buildFPTrunc(DstReg, Cvt0, MI.getFlags()); 1975ffd83dbSDimitry Andric } 1985ffd83dbSDimitry Andric 1995ffd83dbSDimitry Andric MI.eraseFromParent(); 2005ffd83dbSDimitry Andric } 2015ffd83dbSDimitry Andric 202*e8d8bef9SDimitry Andric bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN( 203*e8d8bef9SDimitry Andric MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) { 2045ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2055ffd83dbSDimitry Andric 2065ffd83dbSDimitry Andric // Look through G_ZEXT. 2075ffd83dbSDimitry Andric mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg))); 2085ffd83dbSDimitry Andric 2095ffd83dbSDimitry Andric Register Src0; 2105ffd83dbSDimitry Andric int64_t ShiftAmt; 2115ffd83dbSDimitry Andric bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt))); 2125ffd83dbSDimitry Andric if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) { 2135ffd83dbSDimitry Andric const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0; 2145ffd83dbSDimitry Andric 2155ffd83dbSDimitry Andric unsigned ShiftOffset = 8 * Offset; 2165ffd83dbSDimitry Andric if (IsShr) 2175ffd83dbSDimitry Andric ShiftOffset += ShiftAmt; 2185ffd83dbSDimitry Andric else 2195ffd83dbSDimitry Andric ShiftOffset -= ShiftAmt; 2205ffd83dbSDimitry Andric 2215ffd83dbSDimitry Andric MatchInfo.CvtVal = Src0; 2225ffd83dbSDimitry Andric MatchInfo.ShiftOffset = ShiftOffset; 2235ffd83dbSDimitry Andric return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0; 2245ffd83dbSDimitry Andric } 2255ffd83dbSDimitry Andric 2265ffd83dbSDimitry Andric // TODO: Simplify demanded bits. 2275ffd83dbSDimitry Andric return false; 2285ffd83dbSDimitry Andric } 2295ffd83dbSDimitry Andric 230*e8d8bef9SDimitry Andric void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN( 231*e8d8bef9SDimitry Andric MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) { 232*e8d8bef9SDimitry Andric B.setInstrAndDebugLoc(MI); 2335ffd83dbSDimitry Andric unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8; 2345ffd83dbSDimitry Andric 2355ffd83dbSDimitry Andric const LLT S32 = LLT::scalar(32); 2365ffd83dbSDimitry Andric Register CvtSrc = MatchInfo.CvtVal; 237*e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(MatchInfo.CvtVal); 2385ffd83dbSDimitry Andric if (SrcTy != S32) { 2395ffd83dbSDimitry Andric assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8); 2405ffd83dbSDimitry Andric CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0); 2415ffd83dbSDimitry Andric } 2425ffd83dbSDimitry Andric 2435ffd83dbSDimitry Andric assert(MI.getOpcode() != NewOpc); 2445ffd83dbSDimitry Andric B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags()); 2455ffd83dbSDimitry Andric MI.eraseFromParent(); 2465ffd83dbSDimitry Andric } 2475ffd83dbSDimitry Andric 248*e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerHelperState { 249*e8d8bef9SDimitry Andric protected: 250*e8d8bef9SDimitry Andric CombinerHelper &Helper; 251*e8d8bef9SDimitry Andric AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper; 252*e8d8bef9SDimitry Andric 253*e8d8bef9SDimitry Andric public: 254*e8d8bef9SDimitry Andric AMDGPUPostLegalizerCombinerHelperState( 255*e8d8bef9SDimitry Andric CombinerHelper &Helper, 256*e8d8bef9SDimitry Andric AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper) 257*e8d8bef9SDimitry Andric : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {} 258*e8d8bef9SDimitry Andric }; 259*e8d8bef9SDimitry Andric 2605ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 2615ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc" 2625ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 2635ffd83dbSDimitry Andric 2645ffd83dbSDimitry Andric namespace { 2655ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 2665ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc" 2675ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 2685ffd83dbSDimitry Andric 269*e8d8bef9SDimitry Andric class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo { 2705ffd83dbSDimitry Andric GISelKnownBits *KB; 2715ffd83dbSDimitry Andric MachineDominatorTree *MDT; 2725ffd83dbSDimitry Andric 2735ffd83dbSDimitry Andric public: 2745ffd83dbSDimitry Andric AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; 2755ffd83dbSDimitry Andric 2765ffd83dbSDimitry Andric AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 2775ffd83dbSDimitry Andric const AMDGPULegalizerInfo *LI, 2785ffd83dbSDimitry Andric GISelKnownBits *KB, MachineDominatorTree *MDT) 2795ffd83dbSDimitry Andric : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, 2805ffd83dbSDimitry Andric /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), 2815ffd83dbSDimitry Andric KB(KB), MDT(MDT) { 2825ffd83dbSDimitry Andric if (!GeneratedRuleCfg.parseCommandLineOption()) 2835ffd83dbSDimitry Andric report_fatal_error("Invalid rule identifier"); 2845ffd83dbSDimitry Andric } 2855ffd83dbSDimitry Andric 2865ffd83dbSDimitry Andric bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 2875ffd83dbSDimitry Andric MachineIRBuilder &B) const override; 2885ffd83dbSDimitry Andric }; 2895ffd83dbSDimitry Andric 2905ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 2915ffd83dbSDimitry Andric MachineInstr &MI, 2925ffd83dbSDimitry Andric MachineIRBuilder &B) const { 293*e8d8bef9SDimitry Andric CombinerHelper Helper(Observer, B, KB, MDT, LInfo); 294*e8d8bef9SDimitry Andric AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper); 295*e8d8bef9SDimitry Andric AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper, 296*e8d8bef9SDimitry Andric PostLegalizerHelper); 2975ffd83dbSDimitry Andric 298*e8d8bef9SDimitry Andric if (Generated.tryCombineAll(Observer, MI, B)) 2995ffd83dbSDimitry Andric return true; 3005ffd83dbSDimitry Andric 3015ffd83dbSDimitry Andric switch (MI.getOpcode()) { 3025ffd83dbSDimitry Andric case TargetOpcode::G_SHL: 3035ffd83dbSDimitry Andric case TargetOpcode::G_LSHR: 3045ffd83dbSDimitry Andric case TargetOpcode::G_ASHR: 3055ffd83dbSDimitry Andric // On some subtargets, 64-bit shift is a quarter rate instruction. In the 3065ffd83dbSDimitry Andric // common case, splitting this into a move and a 32-bit shift is faster and 3075ffd83dbSDimitry Andric // the same code size. 3085ffd83dbSDimitry Andric return Helper.tryCombineShiftToUnmerge(MI, 32); 3095ffd83dbSDimitry Andric } 3105ffd83dbSDimitry Andric 3115ffd83dbSDimitry Andric return false; 3125ffd83dbSDimitry Andric } 3135ffd83dbSDimitry Andric 3145ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 3155ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc" 3165ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 3175ffd83dbSDimitry Andric 3185ffd83dbSDimitry Andric // Pass boilerplate 3195ffd83dbSDimitry Andric // ================ 3205ffd83dbSDimitry Andric 3215ffd83dbSDimitry Andric class AMDGPUPostLegalizerCombiner : public MachineFunctionPass { 3225ffd83dbSDimitry Andric public: 3235ffd83dbSDimitry Andric static char ID; 3245ffd83dbSDimitry Andric 3255ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner(bool IsOptNone = false); 3265ffd83dbSDimitry Andric 3275ffd83dbSDimitry Andric StringRef getPassName() const override { 3285ffd83dbSDimitry Andric return "AMDGPUPostLegalizerCombiner"; 3295ffd83dbSDimitry Andric } 3305ffd83dbSDimitry Andric 3315ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 3325ffd83dbSDimitry Andric 3335ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 3345ffd83dbSDimitry Andric private: 3355ffd83dbSDimitry Andric bool IsOptNone; 3365ffd83dbSDimitry Andric }; 3375ffd83dbSDimitry Andric } // end anonymous namespace 3385ffd83dbSDimitry Andric 3395ffd83dbSDimitry Andric void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 3405ffd83dbSDimitry Andric AU.addRequired<TargetPassConfig>(); 3415ffd83dbSDimitry Andric AU.setPreservesCFG(); 3425ffd83dbSDimitry Andric getSelectionDAGFallbackAnalysisUsage(AU); 3435ffd83dbSDimitry Andric AU.addRequired<GISelKnownBitsAnalysis>(); 3445ffd83dbSDimitry Andric AU.addPreserved<GISelKnownBitsAnalysis>(); 3455ffd83dbSDimitry Andric if (!IsOptNone) { 3465ffd83dbSDimitry Andric AU.addRequired<MachineDominatorTree>(); 3475ffd83dbSDimitry Andric AU.addPreserved<MachineDominatorTree>(); 3485ffd83dbSDimitry Andric } 3495ffd83dbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 3505ffd83dbSDimitry Andric } 3515ffd83dbSDimitry Andric 3525ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone) 3535ffd83dbSDimitry Andric : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 3545ffd83dbSDimitry Andric initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 3555ffd83dbSDimitry Andric } 3565ffd83dbSDimitry Andric 3575ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 3585ffd83dbSDimitry Andric if (MF.getProperties().hasProperty( 3595ffd83dbSDimitry Andric MachineFunctionProperties::Property::FailedISel)) 3605ffd83dbSDimitry Andric return false; 3615ffd83dbSDimitry Andric auto *TPC = &getAnalysis<TargetPassConfig>(); 3625ffd83dbSDimitry Andric const Function &F = MF.getFunction(); 3635ffd83dbSDimitry Andric bool EnableOpt = 3645ffd83dbSDimitry Andric MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 3655ffd83dbSDimitry Andric 3665ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 3675ffd83dbSDimitry Andric const AMDGPULegalizerInfo *LI 3685ffd83dbSDimitry Andric = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 3695ffd83dbSDimitry Andric 3705ffd83dbSDimitry Andric GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 3715ffd83dbSDimitry Andric MachineDominatorTree *MDT = 3725ffd83dbSDimitry Andric IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 3735ffd83dbSDimitry Andric AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 3745ffd83dbSDimitry Andric F.hasMinSize(), LI, KB, MDT); 3755ffd83dbSDimitry Andric Combiner C(PCInfo, TPC); 3765ffd83dbSDimitry Andric return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 3775ffd83dbSDimitry Andric } 3785ffd83dbSDimitry Andric 3795ffd83dbSDimitry Andric char AMDGPUPostLegalizerCombiner::ID = 0; 3805ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 3815ffd83dbSDimitry Andric "Combine AMDGPU machine instrs after legalization", 3825ffd83dbSDimitry Andric false, false) 3835ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 3845ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 3855ffd83dbSDimitry Andric INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 3865ffd83dbSDimitry Andric "Combine AMDGPU machine instrs after legalization", false, 3875ffd83dbSDimitry Andric false) 3885ffd83dbSDimitry Andric 3895ffd83dbSDimitry Andric namespace llvm { 3905ffd83dbSDimitry Andric FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) { 3915ffd83dbSDimitry Andric return new AMDGPUPostLegalizerCombiner(IsOptNone); 3925ffd83dbSDimitry Andric } 3935ffd83dbSDimitry Andric } // end namespace llvm 394