1 //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // after register banks are known. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPULegalizerInfo.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "llvm/CodeGen/GlobalISel/Combiner.h" 21 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 22 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 23 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h" 24 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" 25 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 26 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 27 #include "llvm/CodeGen/MachineDominators.h" 28 #include "llvm/CodeGen/TargetPassConfig.h" 29 #include "llvm/IR/IntrinsicsAMDGPU.h" 30 #include "llvm/Target/TargetMachine.h" 31 32 #define GET_GICOMBINER_DEPS 33 #include "AMDGPUGenPreLegalizeGICombiner.inc" 34 #undef GET_GICOMBINER_DEPS 35 36 #define DEBUG_TYPE "amdgpu-regbank-combiner" 37 38 using namespace llvm; 39 using namespace MIPatternMatch; 40 41 namespace { 42 #define GET_GICOMBINER_TYPES 43 #include "AMDGPUGenRegBankGICombiner.inc" 44 #undef GET_GICOMBINER_TYPES 45 46 class AMDGPURegBankCombinerImpl : public GIMatchTableExecutor { 47 protected: 48 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig; 49 50 MachineIRBuilder &B; 51 MachineFunction &MF; 52 MachineRegisterInfo &MRI; 53 const GCNSubtarget &STI; 54 const RegisterBankInfo &RBI; 55 const TargetRegisterInfo &TRI; 56 const SIInstrInfo &TII; 57 CombinerHelper &Helper; 58 GISelChangeObserver &Observer; 59 60 public: 61 AMDGPURegBankCombinerImpl( 62 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig, 63 MachineIRBuilder &B, CombinerHelper &Helper, 64 GISelChangeObserver &Observer); 65 66 static const char *getName() { return "AMDGPURegBankCombinerImpl"; } 67 68 bool tryCombineAll(MachineInstr &I) const; 69 70 bool isVgprRegBank(Register Reg) const; 71 Register getAsVgpr(Register Reg) const; 72 73 struct MinMaxMedOpc { 74 unsigned Min, Max, Med; 75 }; 76 77 struct Med3MatchInfo { 78 unsigned Opc; 79 Register Val0, Val1, Val2; 80 }; 81 82 MinMaxMedOpc getMinMaxPair(unsigned Opc) const; 83 84 template <class m_Cst, typename CstTy> 85 bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc, 86 Register &Val, CstTy &K0, CstTy &K1) const; 87 88 bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const; 89 bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const; 90 bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg) const; 91 bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg) const; 92 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const; 93 void applyClamp(MachineInstr &MI, Register &Reg) const; 94 95 private: 96 SIModeRegisterDefaults getMode() const; 97 bool getIEEE() const; 98 bool getDX10Clamp() const; 99 bool isFminnumIeee(const MachineInstr &MI) const; 100 bool isFCst(MachineInstr *MI) const; 101 bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1) const; 102 103 #define GET_GICOMBINER_CLASS_MEMBERS 104 #define AMDGPUSubtarget GCNSubtarget 105 #include "AMDGPUGenRegBankGICombiner.inc" 106 #undef GET_GICOMBINER_CLASS_MEMBERS 107 #undef AMDGPUSubtarget 108 }; 109 110 #define GET_GICOMBINER_IMPL 111 #define AMDGPUSubtarget GCNSubtarget 112 #include "AMDGPUGenRegBankGICombiner.inc" 113 #undef AMDGPUSubtarget 114 #undef GET_GICOMBINER_IMPL 115 116 AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl( 117 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig, MachineIRBuilder &B, 118 CombinerHelper &Helper, GISelChangeObserver &Observer) 119 : RuleConfig(RuleConfig), B(B), MF(B.getMF()), MRI(*B.getMRI()), 120 STI(MF.getSubtarget<GCNSubtarget>()), RBI(*STI.getRegBankInfo()), 121 TRI(*STI.getRegisterInfo()), TII(*STI.getInstrInfo()), Helper(Helper), 122 Observer(Observer), 123 #define GET_GICOMBINER_CONSTRUCTOR_INITS 124 #include "AMDGPUGenRegBankGICombiner.inc" 125 #undef GET_GICOMBINER_CONSTRUCTOR_INITS 126 { 127 } 128 129 bool AMDGPURegBankCombinerImpl::isVgprRegBank(Register Reg) const { 130 return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID; 131 } 132 133 Register AMDGPURegBankCombinerImpl::getAsVgpr(Register Reg) const { 134 if (isVgprRegBank(Reg)) 135 return Reg; 136 137 // Search for existing copy of Reg to vgpr. 138 for (MachineInstr &Use : MRI.use_instructions(Reg)) { 139 Register Def = Use.getOperand(0).getReg(); 140 if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def)) 141 return Def; 142 } 143 144 // Copy Reg to vgpr. 145 Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0); 146 MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID)); 147 return VgprReg; 148 } 149 150 AMDGPURegBankCombinerImpl::MinMaxMedOpc 151 AMDGPURegBankCombinerImpl::getMinMaxPair(unsigned Opc) const { 152 switch (Opc) { 153 default: 154 llvm_unreachable("Unsupported opcode"); 155 case AMDGPU::G_SMAX: 156 case AMDGPU::G_SMIN: 157 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3}; 158 case AMDGPU::G_UMAX: 159 case AMDGPU::G_UMIN: 160 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3}; 161 case AMDGPU::G_FMAXNUM: 162 case AMDGPU::G_FMINNUM: 163 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3}; 164 case AMDGPU::G_FMAXNUM_IEEE: 165 case AMDGPU::G_FMINNUM_IEEE: 166 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE, 167 AMDGPU::G_AMDGPU_FMED3}; 168 } 169 } 170 171 template <class m_Cst, typename CstTy> 172 bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI, 173 MachineRegisterInfo &MRI, 174 MinMaxMedOpc MMMOpc, Register &Val, 175 CstTy &K0, CstTy &K1) const { 176 // 4 operand commutes of: min(max(Val, K0), K1). 177 // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)). 178 // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0). 179 // 4 operand commutes of: max(min(Val, K1), K0). 180 // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)). 181 // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1). 182 return mi_match( 183 MI, MRI, 184 m_any_of( 185 m_CommutativeBinOp( 186 MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)), 187 m_Cst(K1)), 188 m_CommutativeBinOp( 189 MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)), 190 m_Cst(K0)))); 191 } 192 193 bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3( 194 MachineInstr &MI, Med3MatchInfo &MatchInfo) const { 195 Register Dst = MI.getOperand(0).getReg(); 196 if (!isVgprRegBank(Dst)) 197 return false; 198 199 // med3 for i16 is only available on gfx9+, and not available for v2i16. 200 LLT Ty = MRI.getType(Dst); 201 if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32)) 202 return false; 203 204 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode()); 205 Register Val; 206 std::optional<ValueAndVReg> K0, K1; 207 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1. 208 if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1)) 209 return false; 210 211 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value)) 212 return false; 213 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value)) 214 return false; 215 216 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg}; 217 return true; 218 } 219 220 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) 221 // ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K 222 // ieee = false : min/max(NaN, K) = K 223 // clamp(NaN) = dx10_clamp ? 0.0 : NaN 224 // Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input. 225 // Other operand commutes (see matchMed) give same result since min and max are 226 // commutative. 227 228 // Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1 229 // with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0. 230 // Val = SNaN only for ieee = true 231 // fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1 232 // min(max(SNaN, K0), K1) = min(QNaN, K1) = K1 233 // max(min(SNaN, K1), K0) = max(K1, K0) = K1 234 // Val = NaN,ieee = false or Val = QNaN,ieee = true 235 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0 236 // min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true) 237 // max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0 238 bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3( 239 MachineInstr &MI, Med3MatchInfo &MatchInfo) const { 240 Register Dst = MI.getOperand(0).getReg(); 241 LLT Ty = MRI.getType(Dst); 242 243 // med3 for f16 is only available on gfx9+, and not available for v2f16. 244 if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32)) 245 return false; 246 247 auto OpcodeTriple = getMinMaxPair(MI.getOpcode()); 248 249 Register Val; 250 std::optional<FPValueAndVReg> K0, K1; 251 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1. 252 if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1)) 253 return false; 254 255 if (K0->Value > K1->Value) 256 return false; 257 258 // For IEEE=false perform combine only when it's safe to assume that there are 259 // no NaN inputs. Most often MI is marked with nnan fast math flag. 260 // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to 261 // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner 262 // nodes(max/min) have same behavior when one input is NaN and other isn't. 263 // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN, 264 // also post-legalizer inputs to min/max are fcanonicalized (never SNaN). 265 if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) { 266 // Don't fold single use constant that can't be inlined. 267 if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) && 268 (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) { 269 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg}; 270 return true; 271 } 272 } 273 274 return false; 275 } 276 277 bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI, 278 Register &Reg) const { 279 // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16). 280 auto OpcodeTriple = getMinMaxPair(MI.getOpcode()); 281 Register Val; 282 std::optional<FPValueAndVReg> K0, K1; 283 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). 284 if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1)) 285 return false; 286 287 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0)) 288 return false; 289 290 // For IEEE=false perform combine only when it's safe to assume that there are 291 // no NaN inputs. Most often MI is marked with nnan fast math flag. 292 // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates 293 // to 0.0 requires dx10_clamp = true. 294 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) && 295 isKnownNeverSNaN(Val, MRI)) || 296 isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) { 297 Reg = Val; 298 return true; 299 } 300 301 return false; 302 } 303 304 // Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true. 305 // Val = SNaN only for ieee = true. It is important which operand is NaN. 306 // min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0 307 // min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0 308 // min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN 309 // Val = NaN,ieee = false or Val = QNaN,ieee = true 310 // min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0 311 // min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0 312 // min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0 313 bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI, 314 Register &Reg) const { 315 // In llvm-ir, clamp is often represented as an intrinsic call to 316 // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders. 317 MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI); 318 MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI); 319 MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI); 320 321 if (isFCst(Src0) && !isFCst(Src1)) 322 std::swap(Src0, Src1); 323 if (isFCst(Src1) && !isFCst(Src2)) 324 std::swap(Src1, Src2); 325 if (isFCst(Src0) && !isFCst(Src1)) 326 std::swap(Src0, Src1); 327 if (!isClampZeroToOne(Src1, Src2)) 328 return false; 329 330 Register Val = Src0->getOperand(0).getReg(); 331 332 auto isOp3Zero = [&]() { 333 MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI); 334 if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT) 335 return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0); 336 return false; 337 }; 338 // For IEEE=false perform combine only when it's safe to assume that there are 339 // no NaN inputs. Most often MI is marked with nnan fast math flag. 340 // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold 341 // when Val could be QNaN. If Val can also be SNaN third input should be 0.0. 342 if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) || 343 (getIEEE() && getDX10Clamp() && 344 (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) { 345 Reg = Val; 346 return true; 347 } 348 349 return false; 350 } 351 352 void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &MI, 353 Register &Reg) const { 354 B.setInstrAndDebugLoc(MI); 355 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg}, 356 MI.getFlags()); 357 MI.eraseFromParent(); 358 } 359 360 void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI, 361 Med3MatchInfo &MatchInfo) const { 362 B.setInstrAndDebugLoc(MI); 363 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)}, 364 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1), 365 getAsVgpr(MatchInfo.Val2)}, 366 MI.getFlags()); 367 MI.eraseFromParent(); 368 } 369 370 SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const { 371 return MF.getInfo<SIMachineFunctionInfo>()->getMode(); 372 } 373 374 bool AMDGPURegBankCombinerImpl::getIEEE() const { return getMode().IEEE; } 375 376 bool AMDGPURegBankCombinerImpl::getDX10Clamp() const { 377 return getMode().DX10Clamp; 378 } 379 380 bool AMDGPURegBankCombinerImpl::isFminnumIeee(const MachineInstr &MI) const { 381 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE; 382 } 383 384 bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *MI) const { 385 return MI->getOpcode() == AMDGPU::G_FCONSTANT; 386 } 387 388 bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0, 389 MachineInstr *K1) const { 390 if (isFCst(K0) && isFCst(K1)) { 391 const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm(); 392 const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm(); 393 return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) || 394 (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0)); 395 } 396 return false; 397 } 398 399 class AMDGPURegBankCombinerInfo final : public CombinerInfo { 400 GISelKnownBits *KB; 401 MachineDominatorTree *MDT; 402 AMDGPURegBankCombinerImplRuleConfig RuleConfig; 403 404 public: 405 AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 406 const AMDGPULegalizerInfo *LI, GISelKnownBits *KB, 407 MachineDominatorTree *MDT) 408 : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, 409 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), 410 KB(KB), MDT(MDT) { 411 if (!RuleConfig.parseCommandLineOption()) 412 report_fatal_error("Invalid rule identifier"); 413 } 414 415 bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 416 MachineIRBuilder &B) const override; 417 }; 418 419 bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer, 420 MachineInstr &MI, 421 MachineIRBuilder &B) const { 422 CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false, KB, MDT); 423 // TODO: Do not re-create the Impl on every inst, it should be per function. 424 AMDGPURegBankCombinerImpl Impl(RuleConfig, B, Helper, Observer); 425 Impl.setupMF(*MI.getMF(), KB); 426 return Impl.tryCombineAll(MI); 427 } 428 429 // Pass boilerplate 430 // ================ 431 432 class AMDGPURegBankCombiner : public MachineFunctionPass { 433 public: 434 static char ID; 435 436 AMDGPURegBankCombiner(bool IsOptNone = false); 437 438 StringRef getPassName() const override { return "AMDGPURegBankCombiner"; } 439 440 bool runOnMachineFunction(MachineFunction &MF) override; 441 442 void getAnalysisUsage(AnalysisUsage &AU) const override; 443 private: 444 bool IsOptNone; 445 }; 446 } // end anonymous namespace 447 448 void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 449 AU.addRequired<TargetPassConfig>(); 450 AU.setPreservesCFG(); 451 getSelectionDAGFallbackAnalysisUsage(AU); 452 AU.addRequired<GISelKnownBitsAnalysis>(); 453 AU.addPreserved<GISelKnownBitsAnalysis>(); 454 if (!IsOptNone) { 455 AU.addRequired<MachineDominatorTree>(); 456 AU.addPreserved<MachineDominatorTree>(); 457 } 458 MachineFunctionPass::getAnalysisUsage(AU); 459 } 460 461 AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone) 462 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 463 initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry()); 464 } 465 466 bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) { 467 if (MF.getProperties().hasProperty( 468 MachineFunctionProperties::Property::FailedISel)) 469 return false; 470 auto *TPC = &getAnalysis<TargetPassConfig>(); 471 const Function &F = MF.getFunction(); 472 bool EnableOpt = 473 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 474 475 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 476 const AMDGPULegalizerInfo *LI = 477 static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 478 479 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 480 MachineDominatorTree *MDT = 481 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 482 AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), F.hasMinSize(), 483 LI, KB, MDT); 484 Combiner C(PCInfo, TPC); 485 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 486 } 487 488 char AMDGPURegBankCombiner::ID = 0; 489 INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE, 490 "Combine AMDGPU machine instrs after regbankselect", 491 false, false) 492 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 493 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 494 INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE, 495 "Combine AMDGPU machine instrs after regbankselect", false, 496 false) 497 498 namespace llvm { 499 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) { 500 return new AMDGPURegBankCombiner(IsOptNone); 501 } 502 } // end namespace llvm 503