xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp (revision 02e9120893770924227138ba49df1edb3896112a)
1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUCombinerHelper.h"
16 #include "AMDGPULegalizerInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "llvm/CodeGen/GlobalISel/Combiner.h"
20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
23 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/TargetPassConfig.h"
28 #include "llvm/IR/IntrinsicsAMDGPU.h"
29 #include "llvm/Target/TargetMachine.h"
30 
31 #define GET_GICOMBINER_DEPS
32 #include "AMDGPUGenPreLegalizeGICombiner.inc"
33 #undef GET_GICOMBINER_DEPS
34 
35 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
36 
37 using namespace llvm;
38 using namespace MIPatternMatch;
39 
40 namespace {
41 #define GET_GICOMBINER_TYPES
42 #include "AMDGPUGenPostLegalizeGICombiner.inc"
43 #undef GET_GICOMBINER_TYPES
44 
45 class AMDGPUPostLegalizerCombinerImpl : public GIMatchTableExecutor {
46 protected:
47   const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;
48 
49   MachineIRBuilder &B;
50   MachineFunction &MF;
51   MachineRegisterInfo &MRI;
52   const GCNSubtarget &STI;
53   const SIInstrInfo &TII;
54   AMDGPUCombinerHelper &Helper;
55   GISelChangeObserver &Observer;
56 
57 public:
58   AMDGPUPostLegalizerCombinerImpl(
59       const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
60       MachineIRBuilder &B, AMDGPUCombinerHelper &Helper,
61       GISelChangeObserver &Observer);
62 
63   static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }
64 
65   bool tryCombineAll(MachineInstr &I) const;
66 
67   struct FMinFMaxLegacyInfo {
68     Register LHS;
69     Register RHS;
70     Register True;
71     Register False;
72     CmpInst::Predicate Pred;
73   };
74 
75   // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
76   bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info) const;
77   void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
78                                          const FMinFMaxLegacyInfo &Info) const;
79 
80   bool matchUCharToFloat(MachineInstr &MI) const;
81   void applyUCharToFloat(MachineInstr &MI) const;
82 
83   bool
84   matchRcpSqrtToRsq(MachineInstr &MI,
85                     std::function<void(MachineIRBuilder &)> &MatchInfo) const;
86 
87   // FIXME: Should be able to have 2 separate matchdatas rather than custom
88   // struct boilerplate.
89   struct CvtF32UByteMatchInfo {
90     Register CvtVal;
91     unsigned ShiftOffset;
92   };
93 
94   bool matchCvtF32UByteN(MachineInstr &MI,
95                          CvtF32UByteMatchInfo &MatchInfo) const;
96   void applyCvtF32UByteN(MachineInstr &MI,
97                          const CvtF32UByteMatchInfo &MatchInfo) const;
98 
99   bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const;
100 
101   // Combine unsigned buffer load and signed extension instructions to generate
102   // signed buffer laod instructions.
103   bool matchCombineSignExtendInReg(MachineInstr &MI,
104                                    MachineInstr *&MatchInfo) const;
105   void applyCombineSignExtendInReg(MachineInstr &MI,
106                                    MachineInstr *&MatchInfo) const;
107 
108 private:
109 #define GET_GICOMBINER_CLASS_MEMBERS
110 #define AMDGPUSubtarget GCNSubtarget
111 #include "AMDGPUGenPostLegalizeGICombiner.inc"
112 #undef GET_GICOMBINER_CLASS_MEMBERS
113 #undef AMDGPUSubtarget
114 };
115 
116 #define GET_GICOMBINER_IMPL
117 #define AMDGPUSubtarget GCNSubtarget
118 #include "AMDGPUGenPostLegalizeGICombiner.inc"
119 #undef AMDGPUSubtarget
120 #undef GET_GICOMBINER_IMPL
121 
122 AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
123     const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
124     MachineIRBuilder &B, AMDGPUCombinerHelper &Helper,
125     GISelChangeObserver &Observer)
126     : RuleConfig(RuleConfig), B(B), MF(B.getMF()), MRI(*B.getMRI()),
127       STI(MF.getSubtarget<GCNSubtarget>()), TII(*STI.getInstrInfo()),
128       Helper(Helper), Observer(Observer),
129 #define GET_GICOMBINER_CONSTRUCTOR_INITS
130 #include "AMDGPUGenPostLegalizeGICombiner.inc"
131 #undef GET_GICOMBINER_CONSTRUCTOR_INITS
132 {
133 }
134 
135 bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
136     MachineInstr &MI, FMinFMaxLegacyInfo &Info) const {
137   // FIXME: Type predicate on pattern
138   if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
139     return false;
140 
141   Register Cond = MI.getOperand(1).getReg();
142   if (!MRI.hasOneNonDBGUse(Cond) ||
143       !mi_match(Cond, MRI,
144                 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
145     return false;
146 
147   Info.True = MI.getOperand(2).getReg();
148   Info.False = MI.getOperand(3).getReg();
149 
150   // TODO: Handle case where the the selected value is an fneg and the compared
151   // constant is the negation of the selected value.
152   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
153       !(Info.LHS == Info.False && Info.RHS == Info.True))
154     return false;
155 
156   switch (Info.Pred) {
157   case CmpInst::FCMP_FALSE:
158   case CmpInst::FCMP_OEQ:
159   case CmpInst::FCMP_ONE:
160   case CmpInst::FCMP_ORD:
161   case CmpInst::FCMP_UNO:
162   case CmpInst::FCMP_UEQ:
163   case CmpInst::FCMP_UNE:
164   case CmpInst::FCMP_TRUE:
165     return false;
166   default:
167     return true;
168   }
169 }
170 
171 void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinToFMaxLegacy(
172     MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {
173   B.setInstrAndDebugLoc(MI);
174   auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
175     B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
176   };
177 
178   switch (Info.Pred) {
179   case CmpInst::FCMP_ULT:
180   case CmpInst::FCMP_ULE:
181     if (Info.LHS == Info.True)
182       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
183     else
184       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
185     break;
186   case CmpInst::FCMP_OLE:
187   case CmpInst::FCMP_OLT: {
188     // We need to permute the operands to get the correct NaN behavior. The
189     // selected operand is the second one based on the failing compare with NaN,
190     // so permute it based on the compare type the hardware uses.
191     if (Info.LHS == Info.True)
192       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
193     else
194       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
195     break;
196   }
197   case CmpInst::FCMP_UGE:
198   case CmpInst::FCMP_UGT: {
199     if (Info.LHS == Info.True)
200       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
201     else
202       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
203     break;
204   }
205   case CmpInst::FCMP_OGT:
206   case CmpInst::FCMP_OGE: {
207     if (Info.LHS == Info.True)
208       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
209     else
210       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
211     break;
212   }
213   default:
214     llvm_unreachable("predicate should not have matched");
215   }
216 
217   MI.eraseFromParent();
218 }
219 
220 bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(
221     MachineInstr &MI) const {
222   Register DstReg = MI.getOperand(0).getReg();
223 
224   // TODO: We could try to match extracting the higher bytes, which would be
225   // easier if i8 vectors weren't promoted to i32 vectors, particularly after
226   // types are legalized. v4i8 -> v4f32 is probably the only case to worry
227   // about in practice.
228   LLT Ty = MRI.getType(DstReg);
229   if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
230     Register SrcReg = MI.getOperand(1).getReg();
231     unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
232     assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
233     const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
234     return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
235   }
236 
237   return false;
238 }
239 
240 void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(
241     MachineInstr &MI) const {
242   B.setInstrAndDebugLoc(MI);
243 
244   const LLT S32 = LLT::scalar(32);
245 
246   Register DstReg = MI.getOperand(0).getReg();
247   Register SrcReg = MI.getOperand(1).getReg();
248   LLT Ty = MRI.getType(DstReg);
249   LLT SrcTy = MRI.getType(SrcReg);
250   if (SrcTy != S32)
251     SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
252 
253   if (Ty == S32) {
254     B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},
255                  MI.getFlags());
256   } else {
257     auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},
258                              MI.getFlags());
259     B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
260   }
261 
262   MI.eraseFromParent();
263 }
264 
265 bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
266     MachineInstr &MI,
267     std::function<void(MachineIRBuilder &)> &MatchInfo) const {
268 
269   auto getRcpSrc = [=](const MachineInstr &MI) {
270     MachineInstr *ResMI = nullptr;
271     if (MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
272         MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
273       ResMI = MRI.getVRegDef(MI.getOperand(2).getReg());
274 
275     return ResMI;
276   };
277 
278   auto getSqrtSrc = [=](const MachineInstr &MI) {
279     MachineInstr *SqrtSrcMI = nullptr;
280     auto Match =
281         mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
282     (void)Match;
283     return SqrtSrcMI;
284   };
285 
286   MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
287   // rcp(sqrt(x))
288   if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
289     MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
290       B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
291           .addUse(SqrtSrcMI->getOperand(0).getReg())
292           .setMIFlags(MI.getFlags());
293     };
294     return true;
295   }
296 
297   // sqrt(rcp(x))
298   if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
299     MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
300       B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
301           .addUse(RcpSrcMI->getOperand(0).getReg())
302           .setMIFlags(MI.getFlags());
303     };
304     return true;
305   }
306 
307   return false;
308 }
309 
310 bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
311     MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
312   Register SrcReg = MI.getOperand(1).getReg();
313 
314   // Look through G_ZEXT.
315   bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
316 
317   Register Src0;
318   int64_t ShiftAmt;
319   IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
320   if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
321     const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
322 
323     unsigned ShiftOffset = 8 * Offset;
324     if (IsShr)
325       ShiftOffset += ShiftAmt;
326     else
327       ShiftOffset -= ShiftAmt;
328 
329     MatchInfo.CvtVal = Src0;
330     MatchInfo.ShiftOffset = ShiftOffset;
331     return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
332   }
333 
334   // TODO: Simplify demanded bits.
335   return false;
336 }
337 
338 void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(
339     MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const {
340   B.setInstrAndDebugLoc(MI);
341   unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
342 
343   const LLT S32 = LLT::scalar(32);
344   Register CvtSrc = MatchInfo.CvtVal;
345   LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
346   if (SrcTy != S32) {
347     assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
348     CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
349   }
350 
351   assert(MI.getOpcode() != NewOpc);
352   B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
353   MI.eraseFromParent();
354 }
355 
356 bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
357     MachineInstr &MI, Register &Reg) const {
358   const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
359       MF.getSubtarget().getTargetLowering());
360   Reg = MI.getOperand(1).getReg();
361   return TLI->isCanonicalized(Reg, MF);
362 }
363 
364 // The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
365 // u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
366 // with sign extension instrucions in order to generate buffer_load_{i8, i16}
367 // instructions.
368 
369 // Identify buffer_load_{u8, u16}.
370 bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
371     MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const {
372   Register Op0Reg = MI.getOperand(1).getReg();
373   SubwordBufferLoad = MRI.getVRegDef(Op0Reg);
374 
375   if (!MRI.hasOneNonDBGUse(Op0Reg))
376     return false;
377 
378   // Check if the first operand of the sign extension is a subword buffer load
379   // instruction.
380   return SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE ||
381          SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
382 }
383 
384 // Combine buffer_load_{u8, u16} and the sign extension instruction to generate
385 // buffer_load_{i8, i16}.
386 void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
387     MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const {
388   // Modify the opcode and the destination of buffer_load_{u8, u16}:
389   // Replace the opcode.
390   unsigned Opc =
391       SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE
392           ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE
393           : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
394   SubwordBufferLoad->setDesc(TII.get(Opc));
395   // Update the destination register of SubwordBufferLoad with the destination
396   // register of the sign extension.
397   Register SignExtendInsnDst = MI.getOperand(0).getReg();
398   SubwordBufferLoad->getOperand(0).setReg(SignExtendInsnDst);
399   // Remove the sign extension.
400   MI.eraseFromParent();
401 }
402 
403 class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
404   GISelKnownBits *KB;
405   MachineDominatorTree *MDT;
406   AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;
407 
408 public:
409   AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
410                                   const AMDGPULegalizerInfo *LI,
411                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
412       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
413                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
414         KB(KB), MDT(MDT) {
415     if (!RuleConfig.parseCommandLineOption())
416       report_fatal_error("Invalid rule identifier");
417   }
418 
419   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
420                MachineIRBuilder &B) const override;
421 };
422 
423 bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
424                                               MachineInstr &MI,
425                                               MachineIRBuilder &B) const {
426   AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT,
427                               LInfo);
428   // TODO: Do not re-create the Impl on every inst, it should be per function.
429   AMDGPUPostLegalizerCombinerImpl Impl(RuleConfig, B, Helper, Observer);
430   Impl.setupMF(*MI.getMF(), KB);
431 
432   if (Impl.tryCombineAll(MI))
433     return true;
434 
435   switch (MI.getOpcode()) {
436   case TargetOpcode::G_SHL:
437   case TargetOpcode::G_LSHR:
438   case TargetOpcode::G_ASHR:
439     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
440     // common case, splitting this into a move and a 32-bit shift is faster and
441     // the same code size.
442     return Helper.tryCombineShiftToUnmerge(MI, 32);
443   }
444 
445   return false;
446 }
447 
448 // Pass boilerplate
449 // ================
450 
451 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
452 public:
453   static char ID;
454 
455   AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
456 
457   StringRef getPassName() const override {
458     return "AMDGPUPostLegalizerCombiner";
459   }
460 
461   bool runOnMachineFunction(MachineFunction &MF) override;
462 
463   void getAnalysisUsage(AnalysisUsage &AU) const override;
464 private:
465   bool IsOptNone;
466 };
467 } // end anonymous namespace
468 
469 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
470   AU.addRequired<TargetPassConfig>();
471   AU.setPreservesCFG();
472   getSelectionDAGFallbackAnalysisUsage(AU);
473   AU.addRequired<GISelKnownBitsAnalysis>();
474   AU.addPreserved<GISelKnownBitsAnalysis>();
475   if (!IsOptNone) {
476     AU.addRequired<MachineDominatorTree>();
477     AU.addPreserved<MachineDominatorTree>();
478   }
479   MachineFunctionPass::getAnalysisUsage(AU);
480 }
481 
482 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
483     : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
484   initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
485 }
486 
487 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
488   if (MF.getProperties().hasProperty(
489           MachineFunctionProperties::Property::FailedISel))
490     return false;
491   auto *TPC = &getAnalysis<TargetPassConfig>();
492   const Function &F = MF.getFunction();
493   bool EnableOpt =
494       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
495 
496   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
497   const AMDGPULegalizerInfo *LI =
498       static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
499 
500   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
501   MachineDominatorTree *MDT =
502       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
503   AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
504                                          F.hasMinSize(), LI, KB, MDT);
505   Combiner C(PCInfo, TPC);
506   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
507 }
508 
509 char AMDGPUPostLegalizerCombiner::ID = 0;
510 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
511                       "Combine AMDGPU machine instrs after legalization", false,
512                       false)
513 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
514 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
515 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
516                     "Combine AMDGPU machine instrs after legalization", false,
517                     false)
518 
519 namespace llvm {
520 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
521   return new AMDGPUPostLegalizerCombiner(IsOptNone);
522 }
523 } // end namespace llvm
524