1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // after the legalizer. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPUCombinerHelper.h" 16 #include "AMDGPULegalizerInfo.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "llvm/CodeGen/GlobalISel/Combiner.h" 20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 22 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" 23 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 24 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 26 #include "llvm/CodeGen/MachineDominators.h" 27 #include "llvm/CodeGen/TargetPassConfig.h" 28 #include "llvm/IR/IntrinsicsAMDGPU.h" 29 #include "llvm/Target/TargetMachine.h" 30 31 #define GET_GICOMBINER_DEPS 32 #include "AMDGPUGenPreLegalizeGICombiner.inc" 33 #undef GET_GICOMBINER_DEPS 34 35 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner" 36 37 using namespace llvm; 38 using namespace MIPatternMatch; 39 40 namespace { 41 #define GET_GICOMBINER_TYPES 42 #include "AMDGPUGenPostLegalizeGICombiner.inc" 43 #undef GET_GICOMBINER_TYPES 44 45 class AMDGPUPostLegalizerCombinerImpl : public Combiner { 46 protected: 47 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig; 48 const GCNSubtarget &STI; 49 const SIInstrInfo &TII; 50 // TODO: Make CombinerHelper methods const. 51 mutable AMDGPUCombinerHelper Helper; 52 53 public: 54 AMDGPUPostLegalizerCombinerImpl( 55 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, 56 GISelKnownBits &KB, GISelCSEInfo *CSEInfo, 57 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig, 58 const GCNSubtarget &STI, MachineDominatorTree *MDT, 59 const LegalizerInfo *LI); 60 61 static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; } 62 63 bool tryCombineAllImpl(MachineInstr &I) const; 64 bool tryCombineAll(MachineInstr &I) const override; 65 66 struct FMinFMaxLegacyInfo { 67 Register LHS; 68 Register RHS; 69 CmpInst::Predicate Pred; 70 }; 71 72 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize 73 bool matchFMinFMaxLegacy(MachineInstr &MI, MachineInstr &FCmp, 74 FMinFMaxLegacyInfo &Info) const; 75 void applySelectFCmpToFMinFMaxLegacy(MachineInstr &MI, 76 const FMinFMaxLegacyInfo &Info) const; 77 78 bool matchUCharToFloat(MachineInstr &MI) const; 79 void applyUCharToFloat(MachineInstr &MI) const; 80 81 bool 82 matchRcpSqrtToRsq(MachineInstr &MI, 83 std::function<void(MachineIRBuilder &)> &MatchInfo) const; 84 85 bool matchFDivSqrtToRsqF16(MachineInstr &MI) const; 86 void applyFDivSqrtToRsqF16(MachineInstr &MI, const Register &X) const; 87 88 // FIXME: Should be able to have 2 separate matchdatas rather than custom 89 // struct boilerplate. 90 struct CvtF32UByteMatchInfo { 91 Register CvtVal; 92 unsigned ShiftOffset; 93 }; 94 95 bool matchCvtF32UByteN(MachineInstr &MI, 96 CvtF32UByteMatchInfo &MatchInfo) const; 97 void applyCvtF32UByteN(MachineInstr &MI, 98 const CvtF32UByteMatchInfo &MatchInfo) const; 99 100 bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const; 101 102 // Combine unsigned buffer load and signed extension instructions to generate 103 // signed buffer load instructions. 104 bool matchCombineSignExtendInReg( 105 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const; 106 void applyCombineSignExtendInReg( 107 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const; 108 109 // Find the s_mul_u64 instructions where the higher bits are either 110 // zero-extended or sign-extended. 111 // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher 112 // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32 113 // bits are zero extended. 114 bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const; 115 116 private: 117 #define GET_GICOMBINER_CLASS_MEMBERS 118 #define AMDGPUSubtarget GCNSubtarget 119 #include "AMDGPUGenPostLegalizeGICombiner.inc" 120 #undef GET_GICOMBINER_CLASS_MEMBERS 121 #undef AMDGPUSubtarget 122 }; 123 124 #define GET_GICOMBINER_IMPL 125 #define AMDGPUSubtarget GCNSubtarget 126 #include "AMDGPUGenPostLegalizeGICombiner.inc" 127 #undef AMDGPUSubtarget 128 #undef GET_GICOMBINER_IMPL 129 130 AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl( 131 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, 132 GISelKnownBits &KB, GISelCSEInfo *CSEInfo, 133 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig, 134 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI) 135 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI), 136 TII(*STI.getInstrInfo()), 137 Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI), 138 #define GET_GICOMBINER_CONSTRUCTOR_INITS 139 #include "AMDGPUGenPostLegalizeGICombiner.inc" 140 #undef GET_GICOMBINER_CONSTRUCTOR_INITS 141 { 142 } 143 144 bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const { 145 if (tryCombineAllImpl(MI)) 146 return true; 147 148 switch (MI.getOpcode()) { 149 case TargetOpcode::G_SHL: 150 case TargetOpcode::G_LSHR: 151 case TargetOpcode::G_ASHR: 152 // On some subtargets, 64-bit shift is a quarter rate instruction. In the 153 // common case, splitting this into a move and a 32-bit shift is faster and 154 // the same code size. 155 return Helper.tryCombineShiftToUnmerge(MI, 32); 156 } 157 158 return false; 159 } 160 161 bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy( 162 MachineInstr &MI, MachineInstr &FCmp, FMinFMaxLegacyInfo &Info) const { 163 if (!MRI.hasOneNonDBGUse(FCmp.getOperand(0).getReg())) 164 return false; 165 166 Info.Pred = 167 static_cast<CmpInst::Predicate>(FCmp.getOperand(1).getPredicate()); 168 Info.LHS = FCmp.getOperand(2).getReg(); 169 Info.RHS = FCmp.getOperand(3).getReg(); 170 Register True = MI.getOperand(2).getReg(); 171 Register False = MI.getOperand(3).getReg(); 172 173 // TODO: Handle case where the the selected value is an fneg and the compared 174 // constant is the negation of the selected value. 175 if ((Info.LHS != True || Info.RHS != False) && 176 (Info.LHS != False || Info.RHS != True)) 177 return false; 178 179 // Invert the predicate if necessary so that the apply function can assume 180 // that the select operands are the same as the fcmp operands. 181 // (select (fcmp P, L, R), R, L) -> (select (fcmp !P, L, R), L, R) 182 if (Info.LHS != True) 183 Info.Pred = CmpInst::getInversePredicate(Info.Pred); 184 185 // Only match </<=/>=/> not ==/!= etc. 186 return Info.Pred != CmpInst::getSwappedPredicate(Info.Pred); 187 } 188 189 void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinFMaxLegacy( 190 MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const { 191 unsigned Opc = (Info.Pred & CmpInst::FCMP_OGT) ? AMDGPU::G_AMDGPU_FMAX_LEGACY 192 : AMDGPU::G_AMDGPU_FMIN_LEGACY; 193 Register X = Info.LHS; 194 Register Y = Info.RHS; 195 if (Info.Pred == CmpInst::getUnorderedPredicate(Info.Pred)) { 196 // We need to permute the operands to get the correct NaN behavior. The 197 // selected operand is the second one based on the failing compare with NaN, 198 // so permute it based on the compare type the hardware uses. 199 std::swap(X, Y); 200 } 201 202 B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); 203 204 MI.eraseFromParent(); 205 } 206 207 bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat( 208 MachineInstr &MI) const { 209 Register DstReg = MI.getOperand(0).getReg(); 210 211 // TODO: We could try to match extracting the higher bytes, which would be 212 // easier if i8 vectors weren't promoted to i32 vectors, particularly after 213 // types are legalized. v4i8 -> v4f32 is probably the only case to worry 214 // about in practice. 215 LLT Ty = MRI.getType(DstReg); 216 if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) { 217 Register SrcReg = MI.getOperand(1).getReg(); 218 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); 219 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64); 220 const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8); 221 return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask); 222 } 223 224 return false; 225 } 226 227 void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat( 228 MachineInstr &MI) const { 229 const LLT S32 = LLT::scalar(32); 230 231 Register DstReg = MI.getOperand(0).getReg(); 232 Register SrcReg = MI.getOperand(1).getReg(); 233 LLT Ty = MRI.getType(DstReg); 234 LLT SrcTy = MRI.getType(SrcReg); 235 if (SrcTy != S32) 236 SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0); 237 238 if (Ty == S32) { 239 B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg}, 240 MI.getFlags()); 241 } else { 242 auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg}, 243 MI.getFlags()); 244 B.buildFPTrunc(DstReg, Cvt0, MI.getFlags()); 245 } 246 247 MI.eraseFromParent(); 248 } 249 250 bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq( 251 MachineInstr &MI, 252 std::function<void(MachineIRBuilder &)> &MatchInfo) const { 253 auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * { 254 if (!MI.getFlag(MachineInstr::FmContract)) 255 return nullptr; 256 257 if (auto *GI = dyn_cast<GIntrinsic>(&MI)) { 258 if (GI->is(Intrinsic::amdgcn_rcp)) 259 return MRI.getVRegDef(MI.getOperand(2).getReg()); 260 } 261 return nullptr; 262 }; 263 264 auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * { 265 if (!MI.getFlag(MachineInstr::FmContract)) 266 return nullptr; 267 MachineInstr *SqrtSrcMI = nullptr; 268 auto Match = 269 mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI))); 270 (void)Match; 271 return SqrtSrcMI; 272 }; 273 274 MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr; 275 // rcp(sqrt(x)) 276 if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) { 277 MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) { 278 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}) 279 .addUse(SqrtSrcMI->getOperand(0).getReg()) 280 .setMIFlags(MI.getFlags()); 281 }; 282 return true; 283 } 284 285 // sqrt(rcp(x)) 286 if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) { 287 MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) { 288 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}) 289 .addUse(RcpSrcMI->getOperand(0).getReg()) 290 .setMIFlags(MI.getFlags()); 291 }; 292 return true; 293 } 294 return false; 295 } 296 297 bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsqF16( 298 MachineInstr &MI) const { 299 Register Sqrt = MI.getOperand(2).getReg(); 300 return MRI.hasOneNonDBGUse(Sqrt); 301 } 302 303 void AMDGPUPostLegalizerCombinerImpl::applyFDivSqrtToRsqF16( 304 MachineInstr &MI, const Register &X) const { 305 Register Dst = MI.getOperand(0).getReg(); 306 Register Y = MI.getOperand(1).getReg(); 307 LLT DstTy = MRI.getType(Dst); 308 uint32_t Flags = MI.getFlags(); 309 Register RSQ = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {DstTy}) 310 .addUse(X) 311 .setMIFlags(Flags) 312 .getReg(0); 313 B.buildFMul(Dst, RSQ, Y, Flags); 314 MI.eraseFromParent(); 315 } 316 317 bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN( 318 MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const { 319 Register SrcReg = MI.getOperand(1).getReg(); 320 321 // Look through G_ZEXT. 322 bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg))); 323 324 Register Src0; 325 int64_t ShiftAmt; 326 IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt))); 327 if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) { 328 const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0; 329 330 unsigned ShiftOffset = 8 * Offset; 331 if (IsShr) 332 ShiftOffset += ShiftAmt; 333 else 334 ShiftOffset -= ShiftAmt; 335 336 MatchInfo.CvtVal = Src0; 337 MatchInfo.ShiftOffset = ShiftOffset; 338 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0; 339 } 340 341 // TODO: Simplify demanded bits. 342 return false; 343 } 344 345 void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN( 346 MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const { 347 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8; 348 349 const LLT S32 = LLT::scalar(32); 350 Register CvtSrc = MatchInfo.CvtVal; 351 LLT SrcTy = MRI.getType(MatchInfo.CvtVal); 352 if (SrcTy != S32) { 353 assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8); 354 CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0); 355 } 356 357 assert(MI.getOpcode() != NewOpc); 358 B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags()); 359 MI.eraseFromParent(); 360 } 361 362 bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize( 363 MachineInstr &MI, Register &Reg) const { 364 const SITargetLowering *TLI = static_cast<const SITargetLowering *>( 365 MF.getSubtarget().getTargetLowering()); 366 Reg = MI.getOperand(1).getReg(); 367 return TLI->isCanonicalized(Reg, MF); 368 } 369 370 // The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8, 371 // u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined 372 // with sign extension instrucions in order to generate buffer_load_{i8, i16} 373 // instructions. 374 375 // Identify buffer_load_{u8, u16}. 376 bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg( 377 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const { 378 Register LoadReg = MI.getOperand(1).getReg(); 379 if (!MRI.hasOneNonDBGUse(LoadReg)) 380 return false; 381 382 // Check if the first operand of the sign extension is a subword buffer load 383 // instruction. 384 MachineInstr *LoadMI = MRI.getVRegDef(LoadReg); 385 int64_t Width = MI.getOperand(2).getImm(); 386 switch (LoadMI->getOpcode()) { 387 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE: 388 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE}; 389 return Width == 8; 390 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT: 391 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT}; 392 return Width == 16; 393 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE: 394 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE}; 395 return Width == 8; 396 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT: 397 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT}; 398 return Width == 16; 399 } 400 return false; 401 } 402 403 // Combine buffer_load_{u8, u16} and the sign extension instruction to generate 404 // buffer_load_{i8, i16}. 405 void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg( 406 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const { 407 auto [LoadMI, NewOpcode] = MatchData; 408 LoadMI->setDesc(TII.get(NewOpcode)); 409 // Update the destination register of the load with the destination register 410 // of the sign extension. 411 Register SignExtendInsnDst = MI.getOperand(0).getReg(); 412 LoadMI->getOperand(0).setReg(SignExtendInsnDst); 413 // Remove the sign extension. 414 MI.eraseFromParent(); 415 } 416 417 bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64( 418 MachineInstr &MI, unsigned &NewOpcode) const { 419 Register Src0 = MI.getOperand(1).getReg(); 420 Register Src1 = MI.getOperand(2).getReg(); 421 if (MRI.getType(Src0) != LLT::scalar(64)) 422 return false; 423 424 if (KB->getKnownBits(Src1).countMinLeadingZeros() >= 32 && 425 KB->getKnownBits(Src0).countMinLeadingZeros() >= 32) { 426 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32; 427 return true; 428 } 429 430 if (KB->computeNumSignBits(Src1) >= 33 && 431 KB->computeNumSignBits(Src0) >= 33) { 432 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32; 433 return true; 434 } 435 return false; 436 } 437 438 // Pass boilerplate 439 // ================ 440 441 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass { 442 public: 443 static char ID; 444 445 AMDGPUPostLegalizerCombiner(bool IsOptNone = false); 446 447 StringRef getPassName() const override { 448 return "AMDGPUPostLegalizerCombiner"; 449 } 450 451 bool runOnMachineFunction(MachineFunction &MF) override; 452 453 void getAnalysisUsage(AnalysisUsage &AU) const override; 454 455 private: 456 bool IsOptNone; 457 AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig; 458 }; 459 } // end anonymous namespace 460 461 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 462 AU.addRequired<TargetPassConfig>(); 463 AU.setPreservesCFG(); 464 getSelectionDAGFallbackAnalysisUsage(AU); 465 AU.addRequired<GISelKnownBitsAnalysis>(); 466 AU.addPreserved<GISelKnownBitsAnalysis>(); 467 if (!IsOptNone) { 468 AU.addRequired<MachineDominatorTreeWrapperPass>(); 469 AU.addPreserved<MachineDominatorTreeWrapperPass>(); 470 } 471 MachineFunctionPass::getAnalysisUsage(AU); 472 } 473 474 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone) 475 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 476 initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 477 478 if (!RuleConfig.parseCommandLineOption()) 479 report_fatal_error("Invalid rule identifier"); 480 } 481 482 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 483 if (MF.getProperties().hasProperty( 484 MachineFunctionProperties::Property::FailedISel)) 485 return false; 486 auto *TPC = &getAnalysis<TargetPassConfig>(); 487 const Function &F = MF.getFunction(); 488 bool EnableOpt = 489 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F); 490 491 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 492 const AMDGPULegalizerInfo *LI = 493 static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 494 495 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 496 MachineDominatorTree *MDT = 497 IsOptNone ? nullptr 498 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 499 500 CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, 501 LI, EnableOpt, F.hasOptSize(), F.hasMinSize()); 502 503 AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr, 504 RuleConfig, ST, MDT, LI); 505 return Impl.combineMachineInstrs(); 506 } 507 508 char AMDGPUPostLegalizerCombiner::ID = 0; 509 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 510 "Combine AMDGPU machine instrs after legalization", false, 511 false) 512 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 513 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 514 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 515 "Combine AMDGPU machine instrs after legalization", false, 516 false) 517 518 namespace llvm { 519 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) { 520 return new AMDGPUPostLegalizerCombiner(IsOptNone); 521 } 522 } // end namespace llvm 523