1*5ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===// 2*5ffd83dbSDimitry Andric // 3*5ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*5ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*5ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*5ffd83dbSDimitry Andric // 7*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 8*5ffd83dbSDimitry Andric // 9*5ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level, 10*5ffd83dbSDimitry Andric // after the legalizer. 11*5ffd83dbSDimitry Andric // 12*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 13*5ffd83dbSDimitry Andric 14*5ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h" 15*5ffd83dbSDimitry Andric #include "AMDGPULegalizerInfo.h" 16*5ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h" 17*5ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 18*5ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 19*5ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 20*5ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 21*5ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 22*5ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 23*5ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 24*5ffd83dbSDimitry Andric #include "llvm/Support/Debug.h" 25*5ffd83dbSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 26*5ffd83dbSDimitry Andric 27*5ffd83dbSDimitry Andric #define DEBUG_TYPE "amdgpu-postlegalizer-combiner" 28*5ffd83dbSDimitry Andric 29*5ffd83dbSDimitry Andric using namespace llvm; 30*5ffd83dbSDimitry Andric using namespace MIPatternMatch; 31*5ffd83dbSDimitry Andric 32*5ffd83dbSDimitry Andric struct FMinFMaxLegacyInfo { 33*5ffd83dbSDimitry Andric Register LHS; 34*5ffd83dbSDimitry Andric Register RHS; 35*5ffd83dbSDimitry Andric Register True; 36*5ffd83dbSDimitry Andric Register False; 37*5ffd83dbSDimitry Andric CmpInst::Predicate Pred; 38*5ffd83dbSDimitry Andric }; 39*5ffd83dbSDimitry Andric 40*5ffd83dbSDimitry Andric // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize 41*5ffd83dbSDimitry Andric static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI, 42*5ffd83dbSDimitry Andric MachineFunction &MF, FMinFMaxLegacyInfo &Info) { 43*5ffd83dbSDimitry Andric // FIXME: Combines should have subtarget predicates, and we shouldn't need 44*5ffd83dbSDimitry Andric // this here. 45*5ffd83dbSDimitry Andric if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy()) 46*5ffd83dbSDimitry Andric return false; 47*5ffd83dbSDimitry Andric 48*5ffd83dbSDimitry Andric // FIXME: Type predicate on pattern 49*5ffd83dbSDimitry Andric if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32)) 50*5ffd83dbSDimitry Andric return false; 51*5ffd83dbSDimitry Andric 52*5ffd83dbSDimitry Andric Register Cond = MI.getOperand(1).getReg(); 53*5ffd83dbSDimitry Andric if (!MRI.hasOneNonDBGUse(Cond) || 54*5ffd83dbSDimitry Andric !mi_match(Cond, MRI, 55*5ffd83dbSDimitry Andric m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS)))) 56*5ffd83dbSDimitry Andric return false; 57*5ffd83dbSDimitry Andric 58*5ffd83dbSDimitry Andric Info.True = MI.getOperand(2).getReg(); 59*5ffd83dbSDimitry Andric Info.False = MI.getOperand(3).getReg(); 60*5ffd83dbSDimitry Andric 61*5ffd83dbSDimitry Andric if (!(Info.LHS == Info.True && Info.RHS == Info.False) && 62*5ffd83dbSDimitry Andric !(Info.LHS == Info.False && Info.RHS == Info.True)) 63*5ffd83dbSDimitry Andric return false; 64*5ffd83dbSDimitry Andric 65*5ffd83dbSDimitry Andric switch (Info.Pred) { 66*5ffd83dbSDimitry Andric case CmpInst::FCMP_FALSE: 67*5ffd83dbSDimitry Andric case CmpInst::FCMP_OEQ: 68*5ffd83dbSDimitry Andric case CmpInst::FCMP_ONE: 69*5ffd83dbSDimitry Andric case CmpInst::FCMP_ORD: 70*5ffd83dbSDimitry Andric case CmpInst::FCMP_UNO: 71*5ffd83dbSDimitry Andric case CmpInst::FCMP_UEQ: 72*5ffd83dbSDimitry Andric case CmpInst::FCMP_UNE: 73*5ffd83dbSDimitry Andric case CmpInst::FCMP_TRUE: 74*5ffd83dbSDimitry Andric return false; 75*5ffd83dbSDimitry Andric default: 76*5ffd83dbSDimitry Andric return true; 77*5ffd83dbSDimitry Andric } 78*5ffd83dbSDimitry Andric } 79*5ffd83dbSDimitry Andric 80*5ffd83dbSDimitry Andric static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, 81*5ffd83dbSDimitry Andric const FMinFMaxLegacyInfo &Info) { 82*5ffd83dbSDimitry Andric 83*5ffd83dbSDimitry Andric auto buildNewInst = [&MI](unsigned Opc, Register X, Register Y) { 84*5ffd83dbSDimitry Andric MachineIRBuilder MIB(MI); 85*5ffd83dbSDimitry Andric MIB.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); 86*5ffd83dbSDimitry Andric }; 87*5ffd83dbSDimitry Andric 88*5ffd83dbSDimitry Andric switch (Info.Pred) { 89*5ffd83dbSDimitry Andric case CmpInst::FCMP_ULT: 90*5ffd83dbSDimitry Andric case CmpInst::FCMP_ULE: 91*5ffd83dbSDimitry Andric if (Info.LHS == Info.True) 92*5ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 93*5ffd83dbSDimitry Andric else 94*5ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 95*5ffd83dbSDimitry Andric break; 96*5ffd83dbSDimitry Andric case CmpInst::FCMP_OLE: 97*5ffd83dbSDimitry Andric case CmpInst::FCMP_OLT: { 98*5ffd83dbSDimitry Andric // We need to permute the operands to get the correct NaN behavior. The 99*5ffd83dbSDimitry Andric // selected operand is the second one based on the failing compare with NaN, 100*5ffd83dbSDimitry Andric // so permute it based on the compare type the hardware uses. 101*5ffd83dbSDimitry Andric if (Info.LHS == Info.True) 102*5ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 103*5ffd83dbSDimitry Andric else 104*5ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 105*5ffd83dbSDimitry Andric break; 106*5ffd83dbSDimitry Andric } 107*5ffd83dbSDimitry Andric case CmpInst::FCMP_UGE: 108*5ffd83dbSDimitry Andric case CmpInst::FCMP_UGT: { 109*5ffd83dbSDimitry Andric if (Info.LHS == Info.True) 110*5ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 111*5ffd83dbSDimitry Andric else 112*5ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 113*5ffd83dbSDimitry Andric break; 114*5ffd83dbSDimitry Andric } 115*5ffd83dbSDimitry Andric case CmpInst::FCMP_OGT: 116*5ffd83dbSDimitry Andric case CmpInst::FCMP_OGE: { 117*5ffd83dbSDimitry Andric if (Info.LHS == Info.True) 118*5ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 119*5ffd83dbSDimitry Andric else 120*5ffd83dbSDimitry Andric buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 121*5ffd83dbSDimitry Andric break; 122*5ffd83dbSDimitry Andric } 123*5ffd83dbSDimitry Andric default: 124*5ffd83dbSDimitry Andric llvm_unreachable("predicate should not have matched"); 125*5ffd83dbSDimitry Andric } 126*5ffd83dbSDimitry Andric 127*5ffd83dbSDimitry Andric MI.eraseFromParent(); 128*5ffd83dbSDimitry Andric } 129*5ffd83dbSDimitry Andric 130*5ffd83dbSDimitry Andric static bool matchUCharToFloat(MachineInstr &MI, MachineRegisterInfo &MRI, 131*5ffd83dbSDimitry Andric MachineFunction &MF, CombinerHelper &Helper) { 132*5ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 133*5ffd83dbSDimitry Andric 134*5ffd83dbSDimitry Andric // TODO: We could try to match extracting the higher bytes, which would be 135*5ffd83dbSDimitry Andric // easier if i8 vectors weren't promoted to i32 vectors, particularly after 136*5ffd83dbSDimitry Andric // types are legalized. v4i8 -> v4f32 is probably the only case to worry 137*5ffd83dbSDimitry Andric // about in practice. 138*5ffd83dbSDimitry Andric LLT Ty = MRI.getType(DstReg); 139*5ffd83dbSDimitry Andric if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) { 140*5ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 141*5ffd83dbSDimitry Andric unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); 142*5ffd83dbSDimitry Andric assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64); 143*5ffd83dbSDimitry Andric const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8); 144*5ffd83dbSDimitry Andric return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask); 145*5ffd83dbSDimitry Andric } 146*5ffd83dbSDimitry Andric 147*5ffd83dbSDimitry Andric return false; 148*5ffd83dbSDimitry Andric } 149*5ffd83dbSDimitry Andric 150*5ffd83dbSDimitry Andric static void applyUCharToFloat(MachineInstr &MI) { 151*5ffd83dbSDimitry Andric MachineIRBuilder B(MI); 152*5ffd83dbSDimitry Andric 153*5ffd83dbSDimitry Andric const LLT S32 = LLT::scalar(32); 154*5ffd83dbSDimitry Andric 155*5ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 156*5ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 157*5ffd83dbSDimitry Andric LLT Ty = B.getMRI()->getType(DstReg); 158*5ffd83dbSDimitry Andric LLT SrcTy = B.getMRI()->getType(SrcReg); 159*5ffd83dbSDimitry Andric if (SrcTy != S32) 160*5ffd83dbSDimitry Andric SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0); 161*5ffd83dbSDimitry Andric 162*5ffd83dbSDimitry Andric if (Ty == S32) { 163*5ffd83dbSDimitry Andric B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, 164*5ffd83dbSDimitry Andric {SrcReg}, MI.getFlags()); 165*5ffd83dbSDimitry Andric } else { 166*5ffd83dbSDimitry Andric auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, 167*5ffd83dbSDimitry Andric {SrcReg}, MI.getFlags()); 168*5ffd83dbSDimitry Andric B.buildFPTrunc(DstReg, Cvt0, MI.getFlags()); 169*5ffd83dbSDimitry Andric } 170*5ffd83dbSDimitry Andric 171*5ffd83dbSDimitry Andric MI.eraseFromParent(); 172*5ffd83dbSDimitry Andric } 173*5ffd83dbSDimitry Andric 174*5ffd83dbSDimitry Andric // FIXME: Should be able to have 2 separate matchdatas rather than custom struct 175*5ffd83dbSDimitry Andric // boilerplate. 176*5ffd83dbSDimitry Andric struct CvtF32UByteMatchInfo { 177*5ffd83dbSDimitry Andric Register CvtVal; 178*5ffd83dbSDimitry Andric unsigned ShiftOffset; 179*5ffd83dbSDimitry Andric }; 180*5ffd83dbSDimitry Andric 181*5ffd83dbSDimitry Andric static bool matchCvtF32UByteN(MachineInstr &MI, MachineRegisterInfo &MRI, 182*5ffd83dbSDimitry Andric MachineFunction &MF, 183*5ffd83dbSDimitry Andric CvtF32UByteMatchInfo &MatchInfo) { 184*5ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 185*5ffd83dbSDimitry Andric 186*5ffd83dbSDimitry Andric // Look through G_ZEXT. 187*5ffd83dbSDimitry Andric mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg))); 188*5ffd83dbSDimitry Andric 189*5ffd83dbSDimitry Andric Register Src0; 190*5ffd83dbSDimitry Andric int64_t ShiftAmt; 191*5ffd83dbSDimitry Andric bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt))); 192*5ffd83dbSDimitry Andric if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) { 193*5ffd83dbSDimitry Andric const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0; 194*5ffd83dbSDimitry Andric 195*5ffd83dbSDimitry Andric unsigned ShiftOffset = 8 * Offset; 196*5ffd83dbSDimitry Andric if (IsShr) 197*5ffd83dbSDimitry Andric ShiftOffset += ShiftAmt; 198*5ffd83dbSDimitry Andric else 199*5ffd83dbSDimitry Andric ShiftOffset -= ShiftAmt; 200*5ffd83dbSDimitry Andric 201*5ffd83dbSDimitry Andric MatchInfo.CvtVal = Src0; 202*5ffd83dbSDimitry Andric MatchInfo.ShiftOffset = ShiftOffset; 203*5ffd83dbSDimitry Andric return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0; 204*5ffd83dbSDimitry Andric } 205*5ffd83dbSDimitry Andric 206*5ffd83dbSDimitry Andric // TODO: Simplify demanded bits. 207*5ffd83dbSDimitry Andric return false; 208*5ffd83dbSDimitry Andric } 209*5ffd83dbSDimitry Andric 210*5ffd83dbSDimitry Andric static void applyCvtF32UByteN(MachineInstr &MI, 211*5ffd83dbSDimitry Andric const CvtF32UByteMatchInfo &MatchInfo) { 212*5ffd83dbSDimitry Andric MachineIRBuilder B(MI); 213*5ffd83dbSDimitry Andric unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8; 214*5ffd83dbSDimitry Andric 215*5ffd83dbSDimitry Andric const LLT S32 = LLT::scalar(32); 216*5ffd83dbSDimitry Andric Register CvtSrc = MatchInfo.CvtVal; 217*5ffd83dbSDimitry Andric LLT SrcTy = B.getMRI()->getType(MatchInfo.CvtVal); 218*5ffd83dbSDimitry Andric if (SrcTy != S32) { 219*5ffd83dbSDimitry Andric assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8); 220*5ffd83dbSDimitry Andric CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0); 221*5ffd83dbSDimitry Andric } 222*5ffd83dbSDimitry Andric 223*5ffd83dbSDimitry Andric assert(MI.getOpcode() != NewOpc); 224*5ffd83dbSDimitry Andric B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags()); 225*5ffd83dbSDimitry Andric MI.eraseFromParent(); 226*5ffd83dbSDimitry Andric } 227*5ffd83dbSDimitry Andric 228*5ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 229*5ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc" 230*5ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 231*5ffd83dbSDimitry Andric 232*5ffd83dbSDimitry Andric namespace { 233*5ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 234*5ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc" 235*5ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 236*5ffd83dbSDimitry Andric 237*5ffd83dbSDimitry Andric class AMDGPUPostLegalizerCombinerInfo : public CombinerInfo { 238*5ffd83dbSDimitry Andric GISelKnownBits *KB; 239*5ffd83dbSDimitry Andric MachineDominatorTree *MDT; 240*5ffd83dbSDimitry Andric 241*5ffd83dbSDimitry Andric public: 242*5ffd83dbSDimitry Andric AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; 243*5ffd83dbSDimitry Andric 244*5ffd83dbSDimitry Andric AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 245*5ffd83dbSDimitry Andric const AMDGPULegalizerInfo *LI, 246*5ffd83dbSDimitry Andric GISelKnownBits *KB, MachineDominatorTree *MDT) 247*5ffd83dbSDimitry Andric : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, 248*5ffd83dbSDimitry Andric /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), 249*5ffd83dbSDimitry Andric KB(KB), MDT(MDT) { 250*5ffd83dbSDimitry Andric if (!GeneratedRuleCfg.parseCommandLineOption()) 251*5ffd83dbSDimitry Andric report_fatal_error("Invalid rule identifier"); 252*5ffd83dbSDimitry Andric } 253*5ffd83dbSDimitry Andric 254*5ffd83dbSDimitry Andric bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 255*5ffd83dbSDimitry Andric MachineIRBuilder &B) const override; 256*5ffd83dbSDimitry Andric }; 257*5ffd83dbSDimitry Andric 258*5ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 259*5ffd83dbSDimitry Andric MachineInstr &MI, 260*5ffd83dbSDimitry Andric MachineIRBuilder &B) const { 261*5ffd83dbSDimitry Andric CombinerHelper Helper(Observer, B, KB, MDT); 262*5ffd83dbSDimitry Andric AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg); 263*5ffd83dbSDimitry Andric 264*5ffd83dbSDimitry Andric if (Generated.tryCombineAll(Observer, MI, B, Helper)) 265*5ffd83dbSDimitry Andric return true; 266*5ffd83dbSDimitry Andric 267*5ffd83dbSDimitry Andric switch (MI.getOpcode()) { 268*5ffd83dbSDimitry Andric case TargetOpcode::G_SHL: 269*5ffd83dbSDimitry Andric case TargetOpcode::G_LSHR: 270*5ffd83dbSDimitry Andric case TargetOpcode::G_ASHR: 271*5ffd83dbSDimitry Andric // On some subtargets, 64-bit shift is a quarter rate instruction. In the 272*5ffd83dbSDimitry Andric // common case, splitting this into a move and a 32-bit shift is faster and 273*5ffd83dbSDimitry Andric // the same code size. 274*5ffd83dbSDimitry Andric return Helper.tryCombineShiftToUnmerge(MI, 32); 275*5ffd83dbSDimitry Andric } 276*5ffd83dbSDimitry Andric 277*5ffd83dbSDimitry Andric return false; 278*5ffd83dbSDimitry Andric } 279*5ffd83dbSDimitry Andric 280*5ffd83dbSDimitry Andric #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 281*5ffd83dbSDimitry Andric #include "AMDGPUGenPostLegalizeGICombiner.inc" 282*5ffd83dbSDimitry Andric #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 283*5ffd83dbSDimitry Andric 284*5ffd83dbSDimitry Andric // Pass boilerplate 285*5ffd83dbSDimitry Andric // ================ 286*5ffd83dbSDimitry Andric 287*5ffd83dbSDimitry Andric class AMDGPUPostLegalizerCombiner : public MachineFunctionPass { 288*5ffd83dbSDimitry Andric public: 289*5ffd83dbSDimitry Andric static char ID; 290*5ffd83dbSDimitry Andric 291*5ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner(bool IsOptNone = false); 292*5ffd83dbSDimitry Andric 293*5ffd83dbSDimitry Andric StringRef getPassName() const override { 294*5ffd83dbSDimitry Andric return "AMDGPUPostLegalizerCombiner"; 295*5ffd83dbSDimitry Andric } 296*5ffd83dbSDimitry Andric 297*5ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 298*5ffd83dbSDimitry Andric 299*5ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 300*5ffd83dbSDimitry Andric private: 301*5ffd83dbSDimitry Andric bool IsOptNone; 302*5ffd83dbSDimitry Andric }; 303*5ffd83dbSDimitry Andric } // end anonymous namespace 304*5ffd83dbSDimitry Andric 305*5ffd83dbSDimitry Andric void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 306*5ffd83dbSDimitry Andric AU.addRequired<TargetPassConfig>(); 307*5ffd83dbSDimitry Andric AU.setPreservesCFG(); 308*5ffd83dbSDimitry Andric getSelectionDAGFallbackAnalysisUsage(AU); 309*5ffd83dbSDimitry Andric AU.addRequired<GISelKnownBitsAnalysis>(); 310*5ffd83dbSDimitry Andric AU.addPreserved<GISelKnownBitsAnalysis>(); 311*5ffd83dbSDimitry Andric if (!IsOptNone) { 312*5ffd83dbSDimitry Andric AU.addRequired<MachineDominatorTree>(); 313*5ffd83dbSDimitry Andric AU.addPreserved<MachineDominatorTree>(); 314*5ffd83dbSDimitry Andric } 315*5ffd83dbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 316*5ffd83dbSDimitry Andric } 317*5ffd83dbSDimitry Andric 318*5ffd83dbSDimitry Andric AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone) 319*5ffd83dbSDimitry Andric : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 320*5ffd83dbSDimitry Andric initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 321*5ffd83dbSDimitry Andric } 322*5ffd83dbSDimitry Andric 323*5ffd83dbSDimitry Andric bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 324*5ffd83dbSDimitry Andric if (MF.getProperties().hasProperty( 325*5ffd83dbSDimitry Andric MachineFunctionProperties::Property::FailedISel)) 326*5ffd83dbSDimitry Andric return false; 327*5ffd83dbSDimitry Andric auto *TPC = &getAnalysis<TargetPassConfig>(); 328*5ffd83dbSDimitry Andric const Function &F = MF.getFunction(); 329*5ffd83dbSDimitry Andric bool EnableOpt = 330*5ffd83dbSDimitry Andric MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 331*5ffd83dbSDimitry Andric 332*5ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 333*5ffd83dbSDimitry Andric const AMDGPULegalizerInfo *LI 334*5ffd83dbSDimitry Andric = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 335*5ffd83dbSDimitry Andric 336*5ffd83dbSDimitry Andric GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 337*5ffd83dbSDimitry Andric MachineDominatorTree *MDT = 338*5ffd83dbSDimitry Andric IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 339*5ffd83dbSDimitry Andric AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 340*5ffd83dbSDimitry Andric F.hasMinSize(), LI, KB, MDT); 341*5ffd83dbSDimitry Andric Combiner C(PCInfo, TPC); 342*5ffd83dbSDimitry Andric return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 343*5ffd83dbSDimitry Andric } 344*5ffd83dbSDimitry Andric 345*5ffd83dbSDimitry Andric char AMDGPUPostLegalizerCombiner::ID = 0; 346*5ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 347*5ffd83dbSDimitry Andric "Combine AMDGPU machine instrs after legalization", 348*5ffd83dbSDimitry Andric false, false) 349*5ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 350*5ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 351*5ffd83dbSDimitry Andric INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 352*5ffd83dbSDimitry Andric "Combine AMDGPU machine instrs after legalization", false, 353*5ffd83dbSDimitry Andric false) 354*5ffd83dbSDimitry Andric 355*5ffd83dbSDimitry Andric namespace llvm { 356*5ffd83dbSDimitry Andric FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) { 357*5ffd83dbSDimitry Andric return new AMDGPUPostLegalizerCombiner(IsOptNone); 358*5ffd83dbSDimitry Andric } 359*5ffd83dbSDimitry Andric } // end namespace llvm 360