xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUCombinerHelper.h"
16 #include "AMDGPULegalizerInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "llvm/CodeGen/GlobalISel/Combiner.h"
20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
23 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
24 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/TargetPassConfig.h"
28 #include "llvm/IR/IntrinsicsAMDGPU.h"
29 #include "llvm/Target/TargetMachine.h"
30 
31 #define GET_GICOMBINER_DEPS
32 #include "AMDGPUGenPreLegalizeGICombiner.inc"
33 #undef GET_GICOMBINER_DEPS
34 
35 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
36 
37 using namespace llvm;
38 using namespace MIPatternMatch;
39 
40 namespace {
41 #define GET_GICOMBINER_TYPES
42 #include "AMDGPUGenPostLegalizeGICombiner.inc"
43 #undef GET_GICOMBINER_TYPES
44 
45 class AMDGPUPostLegalizerCombinerImpl : public Combiner {
46 protected:
47   const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;
48   const GCNSubtarget &STI;
49   const SIInstrInfo &TII;
50   // TODO: Make CombinerHelper methods const.
51   mutable AMDGPUCombinerHelper Helper;
52 
53 public:
54   AMDGPUPostLegalizerCombinerImpl(
55       MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
56       GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
57       const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
58       const GCNSubtarget &STI, MachineDominatorTree *MDT,
59       const LegalizerInfo *LI);
60 
61   static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }
62 
63   bool tryCombineAllImpl(MachineInstr &I) const;
64   bool tryCombineAll(MachineInstr &I) const override;
65 
66   struct FMinFMaxLegacyInfo {
67     Register LHS;
68     Register RHS;
69     Register True;
70     Register False;
71     CmpInst::Predicate Pred;
72   };
73 
74   // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
75   bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info) const;
76   void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
77                                          const FMinFMaxLegacyInfo &Info) const;
78 
79   bool matchUCharToFloat(MachineInstr &MI) const;
80   void applyUCharToFloat(MachineInstr &MI) const;
81 
82   bool
83   matchRcpSqrtToRsq(MachineInstr &MI,
84                     std::function<void(MachineIRBuilder &)> &MatchInfo) const;
85 
86   // FIXME: Should be able to have 2 separate matchdatas rather than custom
87   // struct boilerplate.
88   struct CvtF32UByteMatchInfo {
89     Register CvtVal;
90     unsigned ShiftOffset;
91   };
92 
93   bool matchCvtF32UByteN(MachineInstr &MI,
94                          CvtF32UByteMatchInfo &MatchInfo) const;
95   void applyCvtF32UByteN(MachineInstr &MI,
96                          const CvtF32UByteMatchInfo &MatchInfo) const;
97 
98   bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const;
99 
100   // Combine unsigned buffer load and signed extension instructions to generate
101   // signed buffer laod instructions.
102   bool matchCombineSignExtendInReg(MachineInstr &MI,
103                                    MachineInstr *&MatchInfo) const;
104   void applyCombineSignExtendInReg(MachineInstr &MI,
105                                    MachineInstr *&MatchInfo) const;
106 
107   // Find the s_mul_u64 instructions where the higher bits are either
108   // zero-extended or sign-extended.
109   bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;
110   // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher
111   // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32
112   // bits are zero extended.
113   void applyCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;
114 
115 private:
116 #define GET_GICOMBINER_CLASS_MEMBERS
117 #define AMDGPUSubtarget GCNSubtarget
118 #include "AMDGPUGenPostLegalizeGICombiner.inc"
119 #undef GET_GICOMBINER_CLASS_MEMBERS
120 #undef AMDGPUSubtarget
121 };
122 
123 #define GET_GICOMBINER_IMPL
124 #define AMDGPUSubtarget GCNSubtarget
125 #include "AMDGPUGenPostLegalizeGICombiner.inc"
126 #undef AMDGPUSubtarget
127 #undef GET_GICOMBINER_IMPL
128 
129 AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
130     MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
131     GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
132     const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
133     const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
134     : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
135       TII(*STI.getInstrInfo()),
136       Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
137 #define GET_GICOMBINER_CONSTRUCTOR_INITS
138 #include "AMDGPUGenPostLegalizeGICombiner.inc"
139 #undef GET_GICOMBINER_CONSTRUCTOR_INITS
140 {
141 }
142 
143 bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
144   if (tryCombineAllImpl(MI))
145     return true;
146 
147   switch (MI.getOpcode()) {
148   case TargetOpcode::G_SHL:
149   case TargetOpcode::G_LSHR:
150   case TargetOpcode::G_ASHR:
151     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
152     // common case, splitting this into a move and a 32-bit shift is faster and
153     // the same code size.
154     return Helper.tryCombineShiftToUnmerge(MI, 32);
155   }
156 
157   return false;
158 }
159 
160 bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
161     MachineInstr &MI, FMinFMaxLegacyInfo &Info) const {
162   // FIXME: Type predicate on pattern
163   if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
164     return false;
165 
166   Register Cond = MI.getOperand(1).getReg();
167   if (!MRI.hasOneNonDBGUse(Cond) ||
168       !mi_match(Cond, MRI,
169                 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
170     return false;
171 
172   Info.True = MI.getOperand(2).getReg();
173   Info.False = MI.getOperand(3).getReg();
174 
175   // TODO: Handle case where the the selected value is an fneg and the compared
176   // constant is the negation of the selected value.
177   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
178       !(Info.LHS == Info.False && Info.RHS == Info.True))
179     return false;
180 
181   switch (Info.Pred) {
182   case CmpInst::FCMP_FALSE:
183   case CmpInst::FCMP_OEQ:
184   case CmpInst::FCMP_ONE:
185   case CmpInst::FCMP_ORD:
186   case CmpInst::FCMP_UNO:
187   case CmpInst::FCMP_UEQ:
188   case CmpInst::FCMP_UNE:
189   case CmpInst::FCMP_TRUE:
190     return false;
191   default:
192     return true;
193   }
194 }
195 
196 void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinToFMaxLegacy(
197     MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {
198   B.setInstrAndDebugLoc(MI);
199   auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
200     B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
201   };
202 
203   switch (Info.Pred) {
204   case CmpInst::FCMP_ULT:
205   case CmpInst::FCMP_ULE:
206     if (Info.LHS == Info.True)
207       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
208     else
209       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
210     break;
211   case CmpInst::FCMP_OLE:
212   case CmpInst::FCMP_OLT: {
213     // We need to permute the operands to get the correct NaN behavior. The
214     // selected operand is the second one based on the failing compare with NaN,
215     // so permute it based on the compare type the hardware uses.
216     if (Info.LHS == Info.True)
217       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
218     else
219       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
220     break;
221   }
222   case CmpInst::FCMP_UGE:
223   case CmpInst::FCMP_UGT: {
224     if (Info.LHS == Info.True)
225       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
226     else
227       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
228     break;
229   }
230   case CmpInst::FCMP_OGT:
231   case CmpInst::FCMP_OGE: {
232     if (Info.LHS == Info.True)
233       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
234     else
235       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
236     break;
237   }
238   default:
239     llvm_unreachable("predicate should not have matched");
240   }
241 
242   MI.eraseFromParent();
243 }
244 
245 bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(
246     MachineInstr &MI) const {
247   Register DstReg = MI.getOperand(0).getReg();
248 
249   // TODO: We could try to match extracting the higher bytes, which would be
250   // easier if i8 vectors weren't promoted to i32 vectors, particularly after
251   // types are legalized. v4i8 -> v4f32 is probably the only case to worry
252   // about in practice.
253   LLT Ty = MRI.getType(DstReg);
254   if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
255     Register SrcReg = MI.getOperand(1).getReg();
256     unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
257     assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
258     const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
259     return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
260   }
261 
262   return false;
263 }
264 
265 void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(
266     MachineInstr &MI) const {
267   B.setInstrAndDebugLoc(MI);
268 
269   const LLT S32 = LLT::scalar(32);
270 
271   Register DstReg = MI.getOperand(0).getReg();
272   Register SrcReg = MI.getOperand(1).getReg();
273   LLT Ty = MRI.getType(DstReg);
274   LLT SrcTy = MRI.getType(SrcReg);
275   if (SrcTy != S32)
276     SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
277 
278   if (Ty == S32) {
279     B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},
280                  MI.getFlags());
281   } else {
282     auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},
283                              MI.getFlags());
284     B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
285   }
286 
287   MI.eraseFromParent();
288 }
289 
290 bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
291     MachineInstr &MI,
292     std::function<void(MachineIRBuilder &)> &MatchInfo) const {
293   auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * {
294     if (!MI.getFlag(MachineInstr::FmContract))
295       return nullptr;
296 
297     if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
298       if (GI->is(Intrinsic::amdgcn_rcp))
299         return MRI.getVRegDef(MI.getOperand(2).getReg());
300     }
301     return nullptr;
302   };
303 
304   auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * {
305     if (!MI.getFlag(MachineInstr::FmContract))
306       return nullptr;
307     MachineInstr *SqrtSrcMI = nullptr;
308     auto Match =
309         mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
310     (void)Match;
311     return SqrtSrcMI;
312   };
313 
314   MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
315   // rcp(sqrt(x))
316   if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
317     MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
318       B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
319           .addUse(SqrtSrcMI->getOperand(0).getReg())
320           .setMIFlags(MI.getFlags());
321     };
322     return true;
323   }
324 
325   // sqrt(rcp(x))
326   if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
327     MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
328       B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
329           .addUse(RcpSrcMI->getOperand(0).getReg())
330           .setMIFlags(MI.getFlags());
331     };
332     return true;
333   }
334   return false;
335 }
336 
337 bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
338     MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
339   Register SrcReg = MI.getOperand(1).getReg();
340 
341   // Look through G_ZEXT.
342   bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
343 
344   Register Src0;
345   int64_t ShiftAmt;
346   IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
347   if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
348     const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
349 
350     unsigned ShiftOffset = 8 * Offset;
351     if (IsShr)
352       ShiftOffset += ShiftAmt;
353     else
354       ShiftOffset -= ShiftAmt;
355 
356     MatchInfo.CvtVal = Src0;
357     MatchInfo.ShiftOffset = ShiftOffset;
358     return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
359   }
360 
361   // TODO: Simplify demanded bits.
362   return false;
363 }
364 
365 void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(
366     MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const {
367   B.setInstrAndDebugLoc(MI);
368   unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
369 
370   const LLT S32 = LLT::scalar(32);
371   Register CvtSrc = MatchInfo.CvtVal;
372   LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
373   if (SrcTy != S32) {
374     assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
375     CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
376   }
377 
378   assert(MI.getOpcode() != NewOpc);
379   B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
380   MI.eraseFromParent();
381 }
382 
383 bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
384     MachineInstr &MI, Register &Reg) const {
385   const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
386       MF.getSubtarget().getTargetLowering());
387   Reg = MI.getOperand(1).getReg();
388   return TLI->isCanonicalized(Reg, MF);
389 }
390 
391 // The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
392 // u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
393 // with sign extension instrucions in order to generate buffer_load_{i8, i16}
394 // instructions.
395 
396 // Identify buffer_load_{u8, u16}.
397 bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
398     MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const {
399   Register Op0Reg = MI.getOperand(1).getReg();
400   SubwordBufferLoad = MRI.getVRegDef(Op0Reg);
401 
402   if (!MRI.hasOneNonDBGUse(Op0Reg))
403     return false;
404 
405   // Check if the first operand of the sign extension is a subword buffer load
406   // instruction.
407   return SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE ||
408          SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
409 }
410 
411 // Combine buffer_load_{u8, u16} and the sign extension instruction to generate
412 // buffer_load_{i8, i16}.
413 void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
414     MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const {
415   // Modify the opcode and the destination of buffer_load_{u8, u16}:
416   // Replace the opcode.
417   unsigned Opc =
418       SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE
419           ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE
420           : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
421   SubwordBufferLoad->setDesc(TII.get(Opc));
422   // Update the destination register of SubwordBufferLoad with the destination
423   // register of the sign extension.
424   Register SignExtendInsnDst = MI.getOperand(0).getReg();
425   SubwordBufferLoad->getOperand(0).setReg(SignExtendInsnDst);
426   // Remove the sign extension.
427   MI.eraseFromParent();
428 }
429 
430 bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64(
431     MachineInstr &MI, unsigned &NewOpcode) const {
432   Register Src0 = MI.getOperand(1).getReg();
433   Register Src1 = MI.getOperand(2).getReg();
434   if (MRI.getType(Src0) != LLT::scalar(64))
435     return false;
436 
437   if (KB->getKnownBits(Src1).countMinLeadingZeros() >= 32 &&
438       KB->getKnownBits(Src0).countMinLeadingZeros() >= 32) {
439     NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32;
440     return true;
441   }
442 
443   if (KB->computeNumSignBits(Src1) >= 33 &&
444       KB->computeNumSignBits(Src0) >= 33) {
445     NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32;
446     return true;
447   }
448   return false;
449 }
450 
451 void AMDGPUPostLegalizerCombinerImpl::applyCombine_s_mul_u64(
452     MachineInstr &MI, unsigned &NewOpcode) const {
453   Helper.replaceOpcodeWith(MI, NewOpcode);
454 }
455 
456 // Pass boilerplate
457 // ================
458 
459 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
460 public:
461   static char ID;
462 
463   AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
464 
465   StringRef getPassName() const override {
466     return "AMDGPUPostLegalizerCombiner";
467   }
468 
469   bool runOnMachineFunction(MachineFunction &MF) override;
470 
471   void getAnalysisUsage(AnalysisUsage &AU) const override;
472 
473 private:
474   bool IsOptNone;
475   AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;
476 };
477 } // end anonymous namespace
478 
479 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
480   AU.addRequired<TargetPassConfig>();
481   AU.setPreservesCFG();
482   getSelectionDAGFallbackAnalysisUsage(AU);
483   AU.addRequired<GISelKnownBitsAnalysis>();
484   AU.addPreserved<GISelKnownBitsAnalysis>();
485   if (!IsOptNone) {
486     AU.addRequired<MachineDominatorTree>();
487     AU.addPreserved<MachineDominatorTree>();
488   }
489   MachineFunctionPass::getAnalysisUsage(AU);
490 }
491 
492 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
493     : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
494   initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
495 
496   if (!RuleConfig.parseCommandLineOption())
497     report_fatal_error("Invalid rule identifier");
498 }
499 
500 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
501   if (MF.getProperties().hasProperty(
502           MachineFunctionProperties::Property::FailedISel))
503     return false;
504   auto *TPC = &getAnalysis<TargetPassConfig>();
505   const Function &F = MF.getFunction();
506   bool EnableOpt =
507       MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
508 
509   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
510   const AMDGPULegalizerInfo *LI =
511       static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
512 
513   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
514   MachineDominatorTree *MDT =
515       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
516 
517   CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
518                      LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
519 
520   AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
521                                        RuleConfig, ST, MDT, LI);
522   return Impl.combineMachineInstrs();
523 }
524 
525 char AMDGPUPostLegalizerCombiner::ID = 0;
526 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
527                       "Combine AMDGPU machine instrs after legalization", false,
528                       false)
529 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
530 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
531 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
532                     "Combine AMDGPU machine instrs after legalization", false,
533                     false)
534 
535 namespace llvm {
536 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
537   return new AMDGPUPostLegalizerCombiner(IsOptNone);
538 }
539 } // end namespace llvm
540