1 //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // after register banks are known. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPULegalizerInfo.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "llvm/CodeGen/GlobalISel/Combiner.h" 20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 22 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 23 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 24 #include "llvm/CodeGen/MachineDominators.h" 25 #include "llvm/CodeGen/TargetPassConfig.h" 26 #include "llvm/Target/TargetMachine.h" 27 #define DEBUG_TYPE "amdgpu-regbank-combiner" 28 29 using namespace llvm; 30 using namespace MIPatternMatch; 31 32 class AMDGPURegBankCombinerHelper { 33 protected: 34 MachineIRBuilder &B; 35 MachineFunction &MF; 36 MachineRegisterInfo &MRI; 37 const RegisterBankInfo &RBI; 38 const TargetRegisterInfo &TRI; 39 CombinerHelper &Helper; 40 41 public: 42 AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper) 43 : B(B), MF(B.getMF()), MRI(*B.getMRI()), 44 RBI(*MF.getSubtarget().getRegBankInfo()), 45 TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){}; 46 47 bool isVgprRegBank(Register Reg); 48 49 struct MinMaxMedOpc { 50 unsigned Min, Max, Med; 51 }; 52 53 struct Med3MatchInfo { 54 unsigned Opc; 55 Register Val0, Val1, Val2; 56 }; 57 58 MinMaxMedOpc getMinMaxPair(unsigned Opc); 59 60 template <class m_Cst, typename CstTy> 61 bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc, 62 Register &Val, CstTy &K0, CstTy &K1); 63 64 bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); 65 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); 66 }; 67 68 bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) { 69 return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID; 70 } 71 72 AMDGPURegBankCombinerHelper::MinMaxMedOpc 73 AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) { 74 switch (Opc) { 75 default: 76 llvm_unreachable("Unsupported opcode"); 77 case AMDGPU::G_SMAX: 78 case AMDGPU::G_SMIN: 79 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3}; 80 case AMDGPU::G_UMAX: 81 case AMDGPU::G_UMIN: 82 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3}; 83 } 84 } 85 86 template <class m_Cst, typename CstTy> 87 bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI, 88 MachineRegisterInfo &MRI, 89 MinMaxMedOpc MMMOpc, Register &Val, 90 CstTy &K0, CstTy &K1) { 91 // 4 operand commutes of: min(max(Val, K0), K1). 92 // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)). 93 // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0). 94 // 4 operand commutes of: max(min(Val, K1), K0). 95 // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)). 96 // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1). 97 return mi_match( 98 MI, MRI, 99 m_any_of( 100 m_CommutativeBinOp( 101 MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)), 102 m_Cst(K1)), 103 m_CommutativeBinOp( 104 MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)), 105 m_Cst(K0)))); 106 } 107 108 bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3( 109 MachineInstr &MI, Med3MatchInfo &MatchInfo) { 110 Register Dst = MI.getOperand(0).getReg(); 111 if (!isVgprRegBank(Dst)) 112 return false; 113 114 if (MRI.getType(Dst).isVector()) 115 return false; 116 117 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode()); 118 Register Val; 119 Optional<ValueAndVReg> K0, K1; 120 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1. 121 if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1)) 122 return false; 123 124 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value)) 125 return false; 126 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value)) 127 return false; 128 129 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg}; 130 return true; 131 } 132 133 void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI, 134 Med3MatchInfo &MatchInfo) { 135 B.setInstrAndDebugLoc(MI); 136 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)}, 137 {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags()); 138 MI.eraseFromParent(); 139 } 140 141 class AMDGPURegBankCombinerHelperState { 142 protected: 143 CombinerHelper &Helper; 144 AMDGPURegBankCombinerHelper &RegBankHelper; 145 146 public: 147 AMDGPURegBankCombinerHelperState(CombinerHelper &Helper, 148 AMDGPURegBankCombinerHelper &RegBankHelper) 149 : Helper(Helper), RegBankHelper(RegBankHelper) {} 150 }; 151 152 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS 153 #include "AMDGPUGenRegBankGICombiner.inc" 154 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS 155 156 namespace { 157 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H 158 #include "AMDGPUGenRegBankGICombiner.inc" 159 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H 160 161 class AMDGPURegBankCombinerInfo final : public CombinerInfo { 162 GISelKnownBits *KB; 163 MachineDominatorTree *MDT; 164 165 public: 166 AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg; 167 168 AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 169 const AMDGPULegalizerInfo *LI, 170 GISelKnownBits *KB, MachineDominatorTree *MDT) 171 : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, 172 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), 173 KB(KB), MDT(MDT) { 174 if (!GeneratedRuleCfg.parseCommandLineOption()) 175 report_fatal_error("Invalid rule identifier"); 176 } 177 178 bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 179 MachineIRBuilder &B) const override; 180 }; 181 182 bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer, 183 MachineInstr &MI, 184 MachineIRBuilder &B) const { 185 CombinerHelper Helper(Observer, B, KB, MDT); 186 AMDGPURegBankCombinerHelper RegBankHelper(B, Helper); 187 AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper, 188 RegBankHelper); 189 190 if (Generated.tryCombineAll(Observer, MI, B)) 191 return true; 192 193 return false; 194 } 195 196 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP 197 #include "AMDGPUGenRegBankGICombiner.inc" 198 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP 199 200 // Pass boilerplate 201 // ================ 202 203 class AMDGPURegBankCombiner : public MachineFunctionPass { 204 public: 205 static char ID; 206 207 AMDGPURegBankCombiner(bool IsOptNone = false); 208 209 StringRef getPassName() const override { 210 return "AMDGPURegBankCombiner"; 211 } 212 213 bool runOnMachineFunction(MachineFunction &MF) override; 214 215 void getAnalysisUsage(AnalysisUsage &AU) const override; 216 private: 217 bool IsOptNone; 218 }; 219 } // end anonymous namespace 220 221 void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 222 AU.addRequired<TargetPassConfig>(); 223 AU.setPreservesCFG(); 224 getSelectionDAGFallbackAnalysisUsage(AU); 225 AU.addRequired<GISelKnownBitsAnalysis>(); 226 AU.addPreserved<GISelKnownBitsAnalysis>(); 227 if (!IsOptNone) { 228 AU.addRequired<MachineDominatorTree>(); 229 AU.addPreserved<MachineDominatorTree>(); 230 } 231 MachineFunctionPass::getAnalysisUsage(AU); 232 } 233 234 AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone) 235 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 236 initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry()); 237 } 238 239 bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) { 240 if (MF.getProperties().hasProperty( 241 MachineFunctionProperties::Property::FailedISel)) 242 return false; 243 auto *TPC = &getAnalysis<TargetPassConfig>(); 244 const Function &F = MF.getFunction(); 245 bool EnableOpt = 246 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 247 248 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 249 const AMDGPULegalizerInfo *LI 250 = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 251 252 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 253 MachineDominatorTree *MDT = 254 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 255 AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 256 F.hasMinSize(), LI, KB, MDT); 257 Combiner C(PCInfo, TPC); 258 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 259 } 260 261 char AMDGPURegBankCombiner::ID = 0; 262 INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE, 263 "Combine AMDGPU machine instrs after regbankselect", 264 false, false) 265 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 266 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 267 INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE, 268 "Combine AMDGPU machine instrs after regbankselect", false, 269 false) 270 271 namespace llvm { 272 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) { 273 return new AMDGPURegBankCombiner(IsOptNone); 274 } 275 } // end namespace llvm 276