1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // after the legalizer. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPUCombinerHelper.h" 16 #include "AMDGPULegalizerInfo.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "llvm/CodeGen/GlobalISel/Combiner.h" 20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 22 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" 23 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 24 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 26 #include "llvm/CodeGen/MachineDominators.h" 27 #include "llvm/CodeGen/TargetPassConfig.h" 28 #include "llvm/IR/IntrinsicsAMDGPU.h" 29 #include "llvm/Target/TargetMachine.h" 30 31 #define GET_GICOMBINER_DEPS 32 #include "AMDGPUGenPreLegalizeGICombiner.inc" 33 #undef GET_GICOMBINER_DEPS 34 35 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner" 36 37 using namespace llvm; 38 using namespace MIPatternMatch; 39 40 namespace { 41 #define GET_GICOMBINER_TYPES 42 #include "AMDGPUGenPostLegalizeGICombiner.inc" 43 #undef GET_GICOMBINER_TYPES 44 45 class AMDGPUPostLegalizerCombinerImpl : public Combiner { 46 protected: 47 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig; 48 const GCNSubtarget &STI; 49 const SIInstrInfo &TII; 50 // TODO: Make CombinerHelper methods const. 51 mutable AMDGPUCombinerHelper Helper; 52 53 public: 54 AMDGPUPostLegalizerCombinerImpl( 55 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, 56 GISelKnownBits &KB, GISelCSEInfo *CSEInfo, 57 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig, 58 const GCNSubtarget &STI, MachineDominatorTree *MDT, 59 const LegalizerInfo *LI); 60 61 static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; } 62 63 bool tryCombineAllImpl(MachineInstr &I) const; 64 bool tryCombineAll(MachineInstr &I) const override; 65 66 struct FMinFMaxLegacyInfo { 67 Register LHS; 68 Register RHS; 69 Register True; 70 Register False; 71 CmpInst::Predicate Pred; 72 }; 73 74 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize 75 bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info) const; 76 void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, 77 const FMinFMaxLegacyInfo &Info) const; 78 79 bool matchUCharToFloat(MachineInstr &MI) const; 80 void applyUCharToFloat(MachineInstr &MI) const; 81 82 bool 83 matchRcpSqrtToRsq(MachineInstr &MI, 84 std::function<void(MachineIRBuilder &)> &MatchInfo) const; 85 86 // FIXME: Should be able to have 2 separate matchdatas rather than custom 87 // struct boilerplate. 88 struct CvtF32UByteMatchInfo { 89 Register CvtVal; 90 unsigned ShiftOffset; 91 }; 92 93 bool matchCvtF32UByteN(MachineInstr &MI, 94 CvtF32UByteMatchInfo &MatchInfo) const; 95 void applyCvtF32UByteN(MachineInstr &MI, 96 const CvtF32UByteMatchInfo &MatchInfo) const; 97 98 bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const; 99 100 // Combine unsigned buffer load and signed extension instructions to generate 101 // signed buffer laod instructions. 102 bool matchCombineSignExtendInReg( 103 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const; 104 void applyCombineSignExtendInReg( 105 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const; 106 107 // Find the s_mul_u64 instructions where the higher bits are either 108 // zero-extended or sign-extended. 109 bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const; 110 // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher 111 // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32 112 // bits are zero extended. 113 void applyCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const; 114 115 private: 116 #define GET_GICOMBINER_CLASS_MEMBERS 117 #define AMDGPUSubtarget GCNSubtarget 118 #include "AMDGPUGenPostLegalizeGICombiner.inc" 119 #undef GET_GICOMBINER_CLASS_MEMBERS 120 #undef AMDGPUSubtarget 121 }; 122 123 #define GET_GICOMBINER_IMPL 124 #define AMDGPUSubtarget GCNSubtarget 125 #include "AMDGPUGenPostLegalizeGICombiner.inc" 126 #undef AMDGPUSubtarget 127 #undef GET_GICOMBINER_IMPL 128 129 AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl( 130 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, 131 GISelKnownBits &KB, GISelCSEInfo *CSEInfo, 132 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig, 133 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI) 134 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI), 135 TII(*STI.getInstrInfo()), 136 Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI), 137 #define GET_GICOMBINER_CONSTRUCTOR_INITS 138 #include "AMDGPUGenPostLegalizeGICombiner.inc" 139 #undef GET_GICOMBINER_CONSTRUCTOR_INITS 140 { 141 } 142 143 bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const { 144 if (tryCombineAllImpl(MI)) 145 return true; 146 147 switch (MI.getOpcode()) { 148 case TargetOpcode::G_SHL: 149 case TargetOpcode::G_LSHR: 150 case TargetOpcode::G_ASHR: 151 // On some subtargets, 64-bit shift is a quarter rate instruction. In the 152 // common case, splitting this into a move and a 32-bit shift is faster and 153 // the same code size. 154 return Helper.tryCombineShiftToUnmerge(MI, 32); 155 } 156 157 return false; 158 } 159 160 bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy( 161 MachineInstr &MI, FMinFMaxLegacyInfo &Info) const { 162 // FIXME: Type predicate on pattern 163 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32)) 164 return false; 165 166 Register Cond = MI.getOperand(1).getReg(); 167 if (!MRI.hasOneNonDBGUse(Cond) || 168 !mi_match(Cond, MRI, 169 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS)))) 170 return false; 171 172 Info.True = MI.getOperand(2).getReg(); 173 Info.False = MI.getOperand(3).getReg(); 174 175 // TODO: Handle case where the the selected value is an fneg and the compared 176 // constant is the negation of the selected value. 177 if (!(Info.LHS == Info.True && Info.RHS == Info.False) && 178 !(Info.LHS == Info.False && Info.RHS == Info.True)) 179 return false; 180 181 switch (Info.Pred) { 182 case CmpInst::FCMP_FALSE: 183 case CmpInst::FCMP_OEQ: 184 case CmpInst::FCMP_ONE: 185 case CmpInst::FCMP_ORD: 186 case CmpInst::FCMP_UNO: 187 case CmpInst::FCMP_UEQ: 188 case CmpInst::FCMP_UNE: 189 case CmpInst::FCMP_TRUE: 190 return false; 191 default: 192 return true; 193 } 194 } 195 196 void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinToFMaxLegacy( 197 MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const { 198 B.setInstrAndDebugLoc(MI); 199 auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) { 200 B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); 201 }; 202 203 switch (Info.Pred) { 204 case CmpInst::FCMP_ULT: 205 case CmpInst::FCMP_ULE: 206 if (Info.LHS == Info.True) 207 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 208 else 209 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 210 break; 211 case CmpInst::FCMP_OLE: 212 case CmpInst::FCMP_OLT: { 213 // We need to permute the operands to get the correct NaN behavior. The 214 // selected operand is the second one based on the failing compare with NaN, 215 // so permute it based on the compare type the hardware uses. 216 if (Info.LHS == Info.True) 217 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 218 else 219 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 220 break; 221 } 222 case CmpInst::FCMP_UGE: 223 case CmpInst::FCMP_UGT: { 224 if (Info.LHS == Info.True) 225 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 226 else 227 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 228 break; 229 } 230 case CmpInst::FCMP_OGT: 231 case CmpInst::FCMP_OGE: { 232 if (Info.LHS == Info.True) 233 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 234 else 235 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 236 break; 237 } 238 default: 239 llvm_unreachable("predicate should not have matched"); 240 } 241 242 MI.eraseFromParent(); 243 } 244 245 bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat( 246 MachineInstr &MI) const { 247 Register DstReg = MI.getOperand(0).getReg(); 248 249 // TODO: We could try to match extracting the higher bytes, which would be 250 // easier if i8 vectors weren't promoted to i32 vectors, particularly after 251 // types are legalized. v4i8 -> v4f32 is probably the only case to worry 252 // about in practice. 253 LLT Ty = MRI.getType(DstReg); 254 if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) { 255 Register SrcReg = MI.getOperand(1).getReg(); 256 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); 257 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64); 258 const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8); 259 return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask); 260 } 261 262 return false; 263 } 264 265 void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat( 266 MachineInstr &MI) const { 267 B.setInstrAndDebugLoc(MI); 268 269 const LLT S32 = LLT::scalar(32); 270 271 Register DstReg = MI.getOperand(0).getReg(); 272 Register SrcReg = MI.getOperand(1).getReg(); 273 LLT Ty = MRI.getType(DstReg); 274 LLT SrcTy = MRI.getType(SrcReg); 275 if (SrcTy != S32) 276 SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0); 277 278 if (Ty == S32) { 279 B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg}, 280 MI.getFlags()); 281 } else { 282 auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg}, 283 MI.getFlags()); 284 B.buildFPTrunc(DstReg, Cvt0, MI.getFlags()); 285 } 286 287 MI.eraseFromParent(); 288 } 289 290 bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq( 291 MachineInstr &MI, 292 std::function<void(MachineIRBuilder &)> &MatchInfo) const { 293 auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * { 294 if (!MI.getFlag(MachineInstr::FmContract)) 295 return nullptr; 296 297 if (auto *GI = dyn_cast<GIntrinsic>(&MI)) { 298 if (GI->is(Intrinsic::amdgcn_rcp)) 299 return MRI.getVRegDef(MI.getOperand(2).getReg()); 300 } 301 return nullptr; 302 }; 303 304 auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * { 305 if (!MI.getFlag(MachineInstr::FmContract)) 306 return nullptr; 307 MachineInstr *SqrtSrcMI = nullptr; 308 auto Match = 309 mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI))); 310 (void)Match; 311 return SqrtSrcMI; 312 }; 313 314 MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr; 315 // rcp(sqrt(x)) 316 if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) { 317 MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) { 318 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}) 319 .addUse(SqrtSrcMI->getOperand(0).getReg()) 320 .setMIFlags(MI.getFlags()); 321 }; 322 return true; 323 } 324 325 // sqrt(rcp(x)) 326 if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) { 327 MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) { 328 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}) 329 .addUse(RcpSrcMI->getOperand(0).getReg()) 330 .setMIFlags(MI.getFlags()); 331 }; 332 return true; 333 } 334 return false; 335 } 336 337 bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN( 338 MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const { 339 Register SrcReg = MI.getOperand(1).getReg(); 340 341 // Look through G_ZEXT. 342 bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg))); 343 344 Register Src0; 345 int64_t ShiftAmt; 346 IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt))); 347 if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) { 348 const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0; 349 350 unsigned ShiftOffset = 8 * Offset; 351 if (IsShr) 352 ShiftOffset += ShiftAmt; 353 else 354 ShiftOffset -= ShiftAmt; 355 356 MatchInfo.CvtVal = Src0; 357 MatchInfo.ShiftOffset = ShiftOffset; 358 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0; 359 } 360 361 // TODO: Simplify demanded bits. 362 return false; 363 } 364 365 void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN( 366 MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const { 367 B.setInstrAndDebugLoc(MI); 368 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8; 369 370 const LLT S32 = LLT::scalar(32); 371 Register CvtSrc = MatchInfo.CvtVal; 372 LLT SrcTy = MRI.getType(MatchInfo.CvtVal); 373 if (SrcTy != S32) { 374 assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8); 375 CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0); 376 } 377 378 assert(MI.getOpcode() != NewOpc); 379 B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags()); 380 MI.eraseFromParent(); 381 } 382 383 bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize( 384 MachineInstr &MI, Register &Reg) const { 385 const SITargetLowering *TLI = static_cast<const SITargetLowering *>( 386 MF.getSubtarget().getTargetLowering()); 387 Reg = MI.getOperand(1).getReg(); 388 return TLI->isCanonicalized(Reg, MF); 389 } 390 391 // The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8, 392 // u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined 393 // with sign extension instrucions in order to generate buffer_load_{i8, i16} 394 // instructions. 395 396 // Identify buffer_load_{u8, u16}. 397 bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg( 398 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const { 399 Register LoadReg = MI.getOperand(1).getReg(); 400 if (!MRI.hasOneNonDBGUse(LoadReg)) 401 return false; 402 403 // Check if the first operand of the sign extension is a subword buffer load 404 // instruction. 405 MachineInstr *LoadMI = MRI.getVRegDef(LoadReg); 406 int64_t Width = MI.getOperand(2).getImm(); 407 switch (LoadMI->getOpcode()) { 408 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE: 409 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE}; 410 return Width == 8; 411 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT: 412 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT}; 413 return Width == 16; 414 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE: 415 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE}; 416 return Width == 8; 417 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT: 418 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT}; 419 return Width == 16; 420 } 421 return false; 422 } 423 424 // Combine buffer_load_{u8, u16} and the sign extension instruction to generate 425 // buffer_load_{i8, i16}. 426 void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg( 427 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const { 428 auto [LoadMI, NewOpcode] = MatchData; 429 LoadMI->setDesc(TII.get(NewOpcode)); 430 // Update the destination register of the load with the destination register 431 // of the sign extension. 432 Register SignExtendInsnDst = MI.getOperand(0).getReg(); 433 LoadMI->getOperand(0).setReg(SignExtendInsnDst); 434 // Remove the sign extension. 435 MI.eraseFromParent(); 436 } 437 438 bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64( 439 MachineInstr &MI, unsigned &NewOpcode) const { 440 Register Src0 = MI.getOperand(1).getReg(); 441 Register Src1 = MI.getOperand(2).getReg(); 442 if (MRI.getType(Src0) != LLT::scalar(64)) 443 return false; 444 445 if (KB->getKnownBits(Src1).countMinLeadingZeros() >= 32 && 446 KB->getKnownBits(Src0).countMinLeadingZeros() >= 32) { 447 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32; 448 return true; 449 } 450 451 if (KB->computeNumSignBits(Src1) >= 33 && 452 KB->computeNumSignBits(Src0) >= 33) { 453 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32; 454 return true; 455 } 456 return false; 457 } 458 459 void AMDGPUPostLegalizerCombinerImpl::applyCombine_s_mul_u64( 460 MachineInstr &MI, unsigned &NewOpcode) const { 461 Helper.replaceOpcodeWith(MI, NewOpcode); 462 } 463 464 // Pass boilerplate 465 // ================ 466 467 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass { 468 public: 469 static char ID; 470 471 AMDGPUPostLegalizerCombiner(bool IsOptNone = false); 472 473 StringRef getPassName() const override { 474 return "AMDGPUPostLegalizerCombiner"; 475 } 476 477 bool runOnMachineFunction(MachineFunction &MF) override; 478 479 void getAnalysisUsage(AnalysisUsage &AU) const override; 480 481 private: 482 bool IsOptNone; 483 AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig; 484 }; 485 } // end anonymous namespace 486 487 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 488 AU.addRequired<TargetPassConfig>(); 489 AU.setPreservesCFG(); 490 getSelectionDAGFallbackAnalysisUsage(AU); 491 AU.addRequired<GISelKnownBitsAnalysis>(); 492 AU.addPreserved<GISelKnownBitsAnalysis>(); 493 if (!IsOptNone) { 494 AU.addRequired<MachineDominatorTree>(); 495 AU.addPreserved<MachineDominatorTree>(); 496 } 497 MachineFunctionPass::getAnalysisUsage(AU); 498 } 499 500 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone) 501 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 502 initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 503 504 if (!RuleConfig.parseCommandLineOption()) 505 report_fatal_error("Invalid rule identifier"); 506 } 507 508 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 509 if (MF.getProperties().hasProperty( 510 MachineFunctionProperties::Property::FailedISel)) 511 return false; 512 auto *TPC = &getAnalysis<TargetPassConfig>(); 513 const Function &F = MF.getFunction(); 514 bool EnableOpt = 515 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F); 516 517 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 518 const AMDGPULegalizerInfo *LI = 519 static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 520 521 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 522 MachineDominatorTree *MDT = 523 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 524 525 CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, 526 LI, EnableOpt, F.hasOptSize(), F.hasMinSize()); 527 528 AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr, 529 RuleConfig, ST, MDT, LI); 530 return Impl.combineMachineInstrs(); 531 } 532 533 char AMDGPUPostLegalizerCombiner::ID = 0; 534 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 535 "Combine AMDGPU machine instrs after legalization", false, 536 false) 537 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 538 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 539 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 540 "Combine AMDGPU machine instrs after legalization", false, 541 false) 542 543 namespace llvm { 544 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) { 545 return new AMDGPUPostLegalizerCombiner(IsOptNone); 546 } 547 } // end namespace llvm 548