1 //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // after register banks are known. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPULegalizerInfo.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "llvm/CodeGen/GlobalISel/Combiner.h" 20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 22 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 23 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 24 #include "llvm/CodeGen/MachineDominators.h" 25 #include "llvm/CodeGen/TargetPassConfig.h" 26 #include "llvm/Target/TargetMachine.h" 27 #define DEBUG_TYPE "amdgpu-regbank-combiner" 28 29 using namespace llvm; 30 using namespace MIPatternMatch; 31 32 class AMDGPURegBankCombinerHelper { 33 protected: 34 MachineIRBuilder &B; 35 MachineFunction &MF; 36 MachineRegisterInfo &MRI; 37 const RegisterBankInfo &RBI; 38 const TargetRegisterInfo &TRI; 39 CombinerHelper &Helper; 40 41 public: 42 AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper) 43 : B(B), MF(B.getMF()), MRI(*B.getMRI()), 44 RBI(*MF.getSubtarget().getRegBankInfo()), 45 TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){}; 46 47 bool isVgprRegBank(Register Reg); 48 49 struct MinMaxMedOpc { 50 unsigned Min, Max, Med; 51 }; 52 53 struct Med3MatchInfo { 54 unsigned Opc; 55 Register Val0, Val1, Val2; 56 }; 57 58 MinMaxMedOpc getMinMaxPair(unsigned Opc); 59 60 template <class m_Cst> 61 bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc, 62 Register &Val, Register &K0, Register &K1); 63 64 bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); 65 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); 66 }; 67 68 bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) { 69 return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID; 70 } 71 72 AMDGPURegBankCombinerHelper::MinMaxMedOpc 73 AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) { 74 switch (Opc) { 75 default: 76 llvm_unreachable("Unsupported opcode"); 77 case AMDGPU::G_SMAX: 78 case AMDGPU::G_SMIN: 79 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3}; 80 case AMDGPU::G_UMAX: 81 case AMDGPU::G_UMIN: 82 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3}; 83 } 84 } 85 86 template <class m_Cst> 87 bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI, 88 MachineRegisterInfo &MRI, 89 MinMaxMedOpc MMMOpc, Register &Val, 90 Register &K0, Register &K1) { 91 // 4 operand commutes of: min(max(Val, K0), K1). 92 // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)). 93 // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0). 94 // 4 operand commutes of: max(min(Val, K1), K0). 95 // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)). 96 // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1). 97 return mi_match( 98 MI, MRI, 99 m_any_of( 100 m_CommutativeBinOp( 101 MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)), 102 m_Cst(K1)), 103 m_CommutativeBinOp( 104 MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)), 105 m_Cst(K0)))); 106 } 107 108 bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3( 109 MachineInstr &MI, Med3MatchInfo &MatchInfo) { 110 Register Dst = MI.getOperand(0).getReg(); 111 if (!isVgprRegBank(Dst)) 112 return false; 113 114 if (MRI.getType(Dst).isVector()) 115 return false; 116 117 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode()); 118 Register Val, K0, K1; 119 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1. 120 if (!matchMed<ICstRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1)) 121 return false; 122 123 const APInt &K0_Imm = getConstantIntVRegVal(K0, MRI)->getValue(); 124 const APInt &K1_Imm = getConstantIntVRegVal(K1, MRI)->getValue(); 125 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0_Imm.sgt(K1_Imm)) 126 return false; 127 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0_Imm.ugt(K1_Imm)) 128 return false; 129 130 MatchInfo = {OpcodeTriple.Med, Val, K0, K1}; 131 return true; 132 } 133 134 void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI, 135 Med3MatchInfo &MatchInfo) { 136 B.setInstrAndDebugLoc(MI); 137 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)}, 138 {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags()); 139 MI.eraseFromParent(); 140 } 141 142 class AMDGPURegBankCombinerHelperState { 143 protected: 144 CombinerHelper &Helper; 145 AMDGPURegBankCombinerHelper &RegBankHelper; 146 147 public: 148 AMDGPURegBankCombinerHelperState(CombinerHelper &Helper, 149 AMDGPURegBankCombinerHelper &RegBankHelper) 150 : Helper(Helper), RegBankHelper(RegBankHelper) {} 151 }; 152 153 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS 154 #include "AMDGPUGenRegBankGICombiner.inc" 155 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS 156 157 namespace { 158 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H 159 #include "AMDGPUGenRegBankGICombiner.inc" 160 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H 161 162 class AMDGPURegBankCombinerInfo final : public CombinerInfo { 163 GISelKnownBits *KB; 164 MachineDominatorTree *MDT; 165 166 public: 167 AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg; 168 169 AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 170 const AMDGPULegalizerInfo *LI, 171 GISelKnownBits *KB, MachineDominatorTree *MDT) 172 : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, 173 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), 174 KB(KB), MDT(MDT) { 175 if (!GeneratedRuleCfg.parseCommandLineOption()) 176 report_fatal_error("Invalid rule identifier"); 177 } 178 179 bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 180 MachineIRBuilder &B) const override; 181 }; 182 183 bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer, 184 MachineInstr &MI, 185 MachineIRBuilder &B) const { 186 CombinerHelper Helper(Observer, B, KB, MDT); 187 AMDGPURegBankCombinerHelper RegBankHelper(B, Helper); 188 AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper, 189 RegBankHelper); 190 191 if (Generated.tryCombineAll(Observer, MI, B)) 192 return true; 193 194 return false; 195 } 196 197 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP 198 #include "AMDGPUGenRegBankGICombiner.inc" 199 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP 200 201 // Pass boilerplate 202 // ================ 203 204 class AMDGPURegBankCombiner : public MachineFunctionPass { 205 public: 206 static char ID; 207 208 AMDGPURegBankCombiner(bool IsOptNone = false); 209 210 StringRef getPassName() const override { 211 return "AMDGPURegBankCombiner"; 212 } 213 214 bool runOnMachineFunction(MachineFunction &MF) override; 215 216 void getAnalysisUsage(AnalysisUsage &AU) const override; 217 private: 218 bool IsOptNone; 219 }; 220 } // end anonymous namespace 221 222 void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 223 AU.addRequired<TargetPassConfig>(); 224 AU.setPreservesCFG(); 225 getSelectionDAGFallbackAnalysisUsage(AU); 226 AU.addRequired<GISelKnownBitsAnalysis>(); 227 AU.addPreserved<GISelKnownBitsAnalysis>(); 228 if (!IsOptNone) { 229 AU.addRequired<MachineDominatorTree>(); 230 AU.addPreserved<MachineDominatorTree>(); 231 } 232 MachineFunctionPass::getAnalysisUsage(AU); 233 } 234 235 AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone) 236 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 237 initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry()); 238 } 239 240 bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) { 241 if (MF.getProperties().hasProperty( 242 MachineFunctionProperties::Property::FailedISel)) 243 return false; 244 auto *TPC = &getAnalysis<TargetPassConfig>(); 245 const Function &F = MF.getFunction(); 246 bool EnableOpt = 247 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 248 249 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 250 const AMDGPULegalizerInfo *LI 251 = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 252 253 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 254 MachineDominatorTree *MDT = 255 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 256 AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 257 F.hasMinSize(), LI, KB, MDT); 258 Combiner C(PCInfo, TPC); 259 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 260 } 261 262 char AMDGPURegBankCombiner::ID = 0; 263 INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE, 264 "Combine AMDGPU machine instrs after regbankselect", 265 false, false) 266 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 267 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 268 INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE, 269 "Combine AMDGPU machine instrs after regbankselect", false, 270 false) 271 272 namespace llvm { 273 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) { 274 return new AMDGPURegBankCombiner(IsOptNone); 275 } 276 } // end namespace llvm 277