1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // after the legalizer. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPUCombinerHelper.h" 16 #include "AMDGPULegalizerInfo.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "llvm/CodeGen/GlobalISel/Combiner.h" 20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 22 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h" 23 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" 24 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 26 #include "llvm/CodeGen/MachineDominators.h" 27 #include "llvm/CodeGen/TargetPassConfig.h" 28 #include "llvm/IR/IntrinsicsAMDGPU.h" 29 #include "llvm/Target/TargetMachine.h" 30 31 #define GET_GICOMBINER_DEPS 32 #include "AMDGPUGenPreLegalizeGICombiner.inc" 33 #undef GET_GICOMBINER_DEPS 34 35 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner" 36 37 using namespace llvm; 38 using namespace MIPatternMatch; 39 40 namespace { 41 #define GET_GICOMBINER_TYPES 42 #include "AMDGPUGenPostLegalizeGICombiner.inc" 43 #undef GET_GICOMBINER_TYPES 44 45 class AMDGPUPostLegalizerCombinerImpl : public GIMatchTableExecutor { 46 protected: 47 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig; 48 49 MachineIRBuilder &B; 50 MachineFunction &MF; 51 MachineRegisterInfo &MRI; 52 const GCNSubtarget &STI; 53 const SIInstrInfo &TII; 54 AMDGPUCombinerHelper &Helper; 55 GISelChangeObserver &Observer; 56 57 public: 58 AMDGPUPostLegalizerCombinerImpl( 59 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig, 60 MachineIRBuilder &B, AMDGPUCombinerHelper &Helper, 61 GISelChangeObserver &Observer); 62 63 static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; } 64 65 bool tryCombineAll(MachineInstr &I) const; 66 67 struct FMinFMaxLegacyInfo { 68 Register LHS; 69 Register RHS; 70 Register True; 71 Register False; 72 CmpInst::Predicate Pred; 73 }; 74 75 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize 76 bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info) const; 77 void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, 78 const FMinFMaxLegacyInfo &Info) const; 79 80 bool matchUCharToFloat(MachineInstr &MI) const; 81 void applyUCharToFloat(MachineInstr &MI) const; 82 83 bool 84 matchRcpSqrtToRsq(MachineInstr &MI, 85 std::function<void(MachineIRBuilder &)> &MatchInfo) const; 86 87 // FIXME: Should be able to have 2 separate matchdatas rather than custom 88 // struct boilerplate. 89 struct CvtF32UByteMatchInfo { 90 Register CvtVal; 91 unsigned ShiftOffset; 92 }; 93 94 bool matchCvtF32UByteN(MachineInstr &MI, 95 CvtF32UByteMatchInfo &MatchInfo) const; 96 void applyCvtF32UByteN(MachineInstr &MI, 97 const CvtF32UByteMatchInfo &MatchInfo) const; 98 99 bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const; 100 101 // Combine unsigned buffer load and signed extension instructions to generate 102 // signed buffer laod instructions. 103 bool matchCombineSignExtendInReg(MachineInstr &MI, 104 MachineInstr *&MatchInfo) const; 105 void applyCombineSignExtendInReg(MachineInstr &MI, 106 MachineInstr *&MatchInfo) const; 107 108 private: 109 #define GET_GICOMBINER_CLASS_MEMBERS 110 #define AMDGPUSubtarget GCNSubtarget 111 #include "AMDGPUGenPostLegalizeGICombiner.inc" 112 #undef GET_GICOMBINER_CLASS_MEMBERS 113 #undef AMDGPUSubtarget 114 }; 115 116 #define GET_GICOMBINER_IMPL 117 #define AMDGPUSubtarget GCNSubtarget 118 #include "AMDGPUGenPostLegalizeGICombiner.inc" 119 #undef AMDGPUSubtarget 120 #undef GET_GICOMBINER_IMPL 121 122 AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl( 123 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig, 124 MachineIRBuilder &B, AMDGPUCombinerHelper &Helper, 125 GISelChangeObserver &Observer) 126 : RuleConfig(RuleConfig), B(B), MF(B.getMF()), MRI(*B.getMRI()), 127 STI(MF.getSubtarget<GCNSubtarget>()), TII(*STI.getInstrInfo()), 128 Helper(Helper), Observer(Observer), 129 #define GET_GICOMBINER_CONSTRUCTOR_INITS 130 #include "AMDGPUGenPostLegalizeGICombiner.inc" 131 #undef GET_GICOMBINER_CONSTRUCTOR_INITS 132 { 133 } 134 135 bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy( 136 MachineInstr &MI, FMinFMaxLegacyInfo &Info) const { 137 // FIXME: Type predicate on pattern 138 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32)) 139 return false; 140 141 Register Cond = MI.getOperand(1).getReg(); 142 if (!MRI.hasOneNonDBGUse(Cond) || 143 !mi_match(Cond, MRI, 144 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS)))) 145 return false; 146 147 Info.True = MI.getOperand(2).getReg(); 148 Info.False = MI.getOperand(3).getReg(); 149 150 // TODO: Handle case where the the selected value is an fneg and the compared 151 // constant is the negation of the selected value. 152 if (!(Info.LHS == Info.True && Info.RHS == Info.False) && 153 !(Info.LHS == Info.False && Info.RHS == Info.True)) 154 return false; 155 156 switch (Info.Pred) { 157 case CmpInst::FCMP_FALSE: 158 case CmpInst::FCMP_OEQ: 159 case CmpInst::FCMP_ONE: 160 case CmpInst::FCMP_ORD: 161 case CmpInst::FCMP_UNO: 162 case CmpInst::FCMP_UEQ: 163 case CmpInst::FCMP_UNE: 164 case CmpInst::FCMP_TRUE: 165 return false; 166 default: 167 return true; 168 } 169 } 170 171 void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinToFMaxLegacy( 172 MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const { 173 B.setInstrAndDebugLoc(MI); 174 auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) { 175 B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); 176 }; 177 178 switch (Info.Pred) { 179 case CmpInst::FCMP_ULT: 180 case CmpInst::FCMP_ULE: 181 if (Info.LHS == Info.True) 182 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 183 else 184 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 185 break; 186 case CmpInst::FCMP_OLE: 187 case CmpInst::FCMP_OLT: { 188 // We need to permute the operands to get the correct NaN behavior. The 189 // selected operand is the second one based on the failing compare with NaN, 190 // so permute it based on the compare type the hardware uses. 191 if (Info.LHS == Info.True) 192 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 193 else 194 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 195 break; 196 } 197 case CmpInst::FCMP_UGE: 198 case CmpInst::FCMP_UGT: { 199 if (Info.LHS == Info.True) 200 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 201 else 202 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 203 break; 204 } 205 case CmpInst::FCMP_OGT: 206 case CmpInst::FCMP_OGE: { 207 if (Info.LHS == Info.True) 208 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 209 else 210 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 211 break; 212 } 213 default: 214 llvm_unreachable("predicate should not have matched"); 215 } 216 217 MI.eraseFromParent(); 218 } 219 220 bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat( 221 MachineInstr &MI) const { 222 Register DstReg = MI.getOperand(0).getReg(); 223 224 // TODO: We could try to match extracting the higher bytes, which would be 225 // easier if i8 vectors weren't promoted to i32 vectors, particularly after 226 // types are legalized. v4i8 -> v4f32 is probably the only case to worry 227 // about in practice. 228 LLT Ty = MRI.getType(DstReg); 229 if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) { 230 Register SrcReg = MI.getOperand(1).getReg(); 231 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); 232 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64); 233 const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8); 234 return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask); 235 } 236 237 return false; 238 } 239 240 void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat( 241 MachineInstr &MI) const { 242 B.setInstrAndDebugLoc(MI); 243 244 const LLT S32 = LLT::scalar(32); 245 246 Register DstReg = MI.getOperand(0).getReg(); 247 Register SrcReg = MI.getOperand(1).getReg(); 248 LLT Ty = MRI.getType(DstReg); 249 LLT SrcTy = MRI.getType(SrcReg); 250 if (SrcTy != S32) 251 SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0); 252 253 if (Ty == S32) { 254 B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg}, 255 MI.getFlags()); 256 } else { 257 auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg}, 258 MI.getFlags()); 259 B.buildFPTrunc(DstReg, Cvt0, MI.getFlags()); 260 } 261 262 MI.eraseFromParent(); 263 } 264 265 bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq( 266 MachineInstr &MI, 267 std::function<void(MachineIRBuilder &)> &MatchInfo) const { 268 269 auto getRcpSrc = [=](const MachineInstr &MI) { 270 MachineInstr *ResMI = nullptr; 271 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC && 272 MI.getIntrinsicID() == Intrinsic::amdgcn_rcp) 273 ResMI = MRI.getVRegDef(MI.getOperand(2).getReg()); 274 275 return ResMI; 276 }; 277 278 auto getSqrtSrc = [=](const MachineInstr &MI) { 279 MachineInstr *SqrtSrcMI = nullptr; 280 auto Match = 281 mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI))); 282 (void)Match; 283 return SqrtSrcMI; 284 }; 285 286 MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr; 287 // rcp(sqrt(x)) 288 if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) { 289 MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) { 290 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false) 291 .addUse(SqrtSrcMI->getOperand(0).getReg()) 292 .setMIFlags(MI.getFlags()); 293 }; 294 return true; 295 } 296 297 // sqrt(rcp(x)) 298 if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) { 299 MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) { 300 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false) 301 .addUse(RcpSrcMI->getOperand(0).getReg()) 302 .setMIFlags(MI.getFlags()); 303 }; 304 return true; 305 } 306 307 return false; 308 } 309 310 bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN( 311 MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const { 312 Register SrcReg = MI.getOperand(1).getReg(); 313 314 // Look through G_ZEXT. 315 bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg))); 316 317 Register Src0; 318 int64_t ShiftAmt; 319 IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt))); 320 if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) { 321 const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0; 322 323 unsigned ShiftOffset = 8 * Offset; 324 if (IsShr) 325 ShiftOffset += ShiftAmt; 326 else 327 ShiftOffset -= ShiftAmt; 328 329 MatchInfo.CvtVal = Src0; 330 MatchInfo.ShiftOffset = ShiftOffset; 331 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0; 332 } 333 334 // TODO: Simplify demanded bits. 335 return false; 336 } 337 338 void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN( 339 MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const { 340 B.setInstrAndDebugLoc(MI); 341 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8; 342 343 const LLT S32 = LLT::scalar(32); 344 Register CvtSrc = MatchInfo.CvtVal; 345 LLT SrcTy = MRI.getType(MatchInfo.CvtVal); 346 if (SrcTy != S32) { 347 assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8); 348 CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0); 349 } 350 351 assert(MI.getOpcode() != NewOpc); 352 B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags()); 353 MI.eraseFromParent(); 354 } 355 356 bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize( 357 MachineInstr &MI, Register &Reg) const { 358 const SITargetLowering *TLI = static_cast<const SITargetLowering *>( 359 MF.getSubtarget().getTargetLowering()); 360 Reg = MI.getOperand(1).getReg(); 361 return TLI->isCanonicalized(Reg, MF); 362 } 363 364 // The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8, 365 // u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined 366 // with sign extension instrucions in order to generate buffer_load_{i8, i16} 367 // instructions. 368 369 // Identify buffer_load_{u8, u16}. 370 bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg( 371 MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const { 372 Register Op0Reg = MI.getOperand(1).getReg(); 373 SubwordBufferLoad = MRI.getVRegDef(Op0Reg); 374 375 if (!MRI.hasOneNonDBGUse(Op0Reg)) 376 return false; 377 378 // Check if the first operand of the sign extension is a subword buffer load 379 // instruction. 380 return SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE || 381 SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT; 382 } 383 384 // Combine buffer_load_{u8, u16} and the sign extension instruction to generate 385 // buffer_load_{i8, i16}. 386 void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg( 387 MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const { 388 // Modify the opcode and the destination of buffer_load_{u8, u16}: 389 // Replace the opcode. 390 unsigned Opc = 391 SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE 392 ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE 393 : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT; 394 SubwordBufferLoad->setDesc(TII.get(Opc)); 395 // Update the destination register of SubwordBufferLoad with the destination 396 // register of the sign extension. 397 Register SignExtendInsnDst = MI.getOperand(0).getReg(); 398 SubwordBufferLoad->getOperand(0).setReg(SignExtendInsnDst); 399 // Remove the sign extension. 400 MI.eraseFromParent(); 401 } 402 403 class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo { 404 GISelKnownBits *KB; 405 MachineDominatorTree *MDT; 406 AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig; 407 408 public: 409 AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 410 const AMDGPULegalizerInfo *LI, 411 GISelKnownBits *KB, MachineDominatorTree *MDT) 412 : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, 413 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), 414 KB(KB), MDT(MDT) { 415 if (!RuleConfig.parseCommandLineOption()) 416 report_fatal_error("Invalid rule identifier"); 417 } 418 419 bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 420 MachineIRBuilder &B) const override; 421 }; 422 423 bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 424 MachineInstr &MI, 425 MachineIRBuilder &B) const { 426 AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT, 427 LInfo); 428 // TODO: Do not re-create the Impl on every inst, it should be per function. 429 AMDGPUPostLegalizerCombinerImpl Impl(RuleConfig, B, Helper, Observer); 430 Impl.setupMF(*MI.getMF(), KB); 431 432 if (Impl.tryCombineAll(MI)) 433 return true; 434 435 switch (MI.getOpcode()) { 436 case TargetOpcode::G_SHL: 437 case TargetOpcode::G_LSHR: 438 case TargetOpcode::G_ASHR: 439 // On some subtargets, 64-bit shift is a quarter rate instruction. In the 440 // common case, splitting this into a move and a 32-bit shift is faster and 441 // the same code size. 442 return Helper.tryCombineShiftToUnmerge(MI, 32); 443 } 444 445 return false; 446 } 447 448 // Pass boilerplate 449 // ================ 450 451 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass { 452 public: 453 static char ID; 454 455 AMDGPUPostLegalizerCombiner(bool IsOptNone = false); 456 457 StringRef getPassName() const override { 458 return "AMDGPUPostLegalizerCombiner"; 459 } 460 461 bool runOnMachineFunction(MachineFunction &MF) override; 462 463 void getAnalysisUsage(AnalysisUsage &AU) const override; 464 private: 465 bool IsOptNone; 466 }; 467 } // end anonymous namespace 468 469 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 470 AU.addRequired<TargetPassConfig>(); 471 AU.setPreservesCFG(); 472 getSelectionDAGFallbackAnalysisUsage(AU); 473 AU.addRequired<GISelKnownBitsAnalysis>(); 474 AU.addPreserved<GISelKnownBitsAnalysis>(); 475 if (!IsOptNone) { 476 AU.addRequired<MachineDominatorTree>(); 477 AU.addPreserved<MachineDominatorTree>(); 478 } 479 MachineFunctionPass::getAnalysisUsage(AU); 480 } 481 482 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone) 483 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 484 initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 485 } 486 487 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 488 if (MF.getProperties().hasProperty( 489 MachineFunctionProperties::Property::FailedISel)) 490 return false; 491 auto *TPC = &getAnalysis<TargetPassConfig>(); 492 const Function &F = MF.getFunction(); 493 bool EnableOpt = 494 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 495 496 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 497 const AMDGPULegalizerInfo *LI = 498 static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 499 500 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 501 MachineDominatorTree *MDT = 502 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 503 AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 504 F.hasMinSize(), LI, KB, MDT); 505 Combiner C(PCInfo, TPC); 506 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 507 } 508 509 char AMDGPUPostLegalizerCombiner::ID = 0; 510 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 511 "Combine AMDGPU machine instrs after legalization", false, 512 false) 513 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 514 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 515 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 516 "Combine AMDGPU machine instrs after legalization", false, 517 false) 518 519 namespace llvm { 520 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) { 521 return new AMDGPUPostLegalizerCombiner(IsOptNone); 522 } 523 } // end namespace llvm 524