xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after register banks are known.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "llvm/CodeGen/GlobalISel/Combiner.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
22 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
23 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/TargetPassConfig.h"
28 #include "llvm/IR/IntrinsicsAMDGPU.h"
29 #include "llvm/Target/TargetMachine.h"
30 
31 #define GET_GICOMBINER_DEPS
32 #include "AMDGPUGenPreLegalizeGICombiner.inc"
33 #undef GET_GICOMBINER_DEPS
34 
35 #define DEBUG_TYPE "amdgpu-regbank-combiner"
36 
37 using namespace llvm;
38 using namespace MIPatternMatch;
39 
40 namespace {
41 #define GET_GICOMBINER_TYPES
42 #include "AMDGPUGenRegBankGICombiner.inc"
43 #undef GET_GICOMBINER_TYPES
44 
45 class AMDGPURegBankCombinerImpl : public Combiner {
46 protected:
47   const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
48   const GCNSubtarget &STI;
49   const RegisterBankInfo &RBI;
50   const TargetRegisterInfo &TRI;
51   const SIInstrInfo &TII;
52   // TODO: Make CombinerHelper methods const.
53   mutable CombinerHelper Helper;
54 
55 public:
56   AMDGPURegBankCombinerImpl(
57       MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
58       GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
59       const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
60       const GCNSubtarget &STI, MachineDominatorTree *MDT,
61       const LegalizerInfo *LI);
62 
63   static const char *getName() { return "AMDGPURegBankCombinerImpl"; }
64 
65   bool tryCombineAll(MachineInstr &I) const override;
66 
67   bool isVgprRegBank(Register Reg) const;
68   Register getAsVgpr(Register Reg) const;
69 
70   struct MinMaxMedOpc {
71     unsigned Min, Max, Med;
72   };
73 
74   struct Med3MatchInfo {
75     unsigned Opc;
76     Register Val0, Val1, Val2;
77   };
78 
79   MinMaxMedOpc getMinMaxPair(unsigned Opc) const;
80 
81   template <class m_Cst, typename CstTy>
82   bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
83                 Register &Val, CstTy &K0, CstTy &K1) const;
84 
85   bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
86   bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
87   bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg) const;
88   bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg) const;
89   void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
90   void applyClamp(MachineInstr &MI, Register &Reg) const;
91 
92 private:
93   SIModeRegisterDefaults getMode() const;
94   bool getIEEE() const;
95   bool getDX10Clamp() const;
96   bool isFminnumIeee(const MachineInstr &MI) const;
97   bool isFCst(MachineInstr *MI) const;
98   bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1) const;
99 
100 #define GET_GICOMBINER_CLASS_MEMBERS
101 #define AMDGPUSubtarget GCNSubtarget
102 #include "AMDGPUGenRegBankGICombiner.inc"
103 #undef GET_GICOMBINER_CLASS_MEMBERS
104 #undef AMDGPUSubtarget
105 };
106 
107 #define GET_GICOMBINER_IMPL
108 #define AMDGPUSubtarget GCNSubtarget
109 #include "AMDGPUGenRegBankGICombiner.inc"
110 #undef AMDGPUSubtarget
111 #undef GET_GICOMBINER_IMPL
112 
113 AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
114     MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
115     GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
116     const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
117     const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
118     : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
119       RBI(*STI.getRegBankInfo()), TRI(*STI.getRegisterInfo()),
120       TII(*STI.getInstrInfo()),
121       Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
122 #define GET_GICOMBINER_CONSTRUCTOR_INITS
123 #include "AMDGPUGenRegBankGICombiner.inc"
124 #undef GET_GICOMBINER_CONSTRUCTOR_INITS
125 {
126 }
127 
128 bool AMDGPURegBankCombinerImpl::isVgprRegBank(Register Reg) const {
129   return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
130 }
131 
132 Register AMDGPURegBankCombinerImpl::getAsVgpr(Register Reg) const {
133   if (isVgprRegBank(Reg))
134     return Reg;
135 
136   // Search for existing copy of Reg to vgpr.
137   for (MachineInstr &Use : MRI.use_instructions(Reg)) {
138     Register Def = Use.getOperand(0).getReg();
139     if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
140       return Def;
141   }
142 
143   // Copy Reg to vgpr.
144   Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
145   MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
146   return VgprReg;
147 }
148 
149 AMDGPURegBankCombinerImpl::MinMaxMedOpc
150 AMDGPURegBankCombinerImpl::getMinMaxPair(unsigned Opc) const {
151   switch (Opc) {
152   default:
153     llvm_unreachable("Unsupported opcode");
154   case AMDGPU::G_SMAX:
155   case AMDGPU::G_SMIN:
156     return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
157   case AMDGPU::G_UMAX:
158   case AMDGPU::G_UMIN:
159     return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
160   case AMDGPU::G_FMAXNUM:
161   case AMDGPU::G_FMINNUM:
162     return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
163   case AMDGPU::G_FMAXNUM_IEEE:
164   case AMDGPU::G_FMINNUM_IEEE:
165     return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
166             AMDGPU::G_AMDGPU_FMED3};
167   }
168 }
169 
170 template <class m_Cst, typename CstTy>
171 bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI,
172                                          MachineRegisterInfo &MRI,
173                                          MinMaxMedOpc MMMOpc, Register &Val,
174                                          CstTy &K0, CstTy &K1) const {
175   // 4 operand commutes of: min(max(Val, K0), K1).
176   // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
177   // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
178   // 4 operand commutes of: max(min(Val, K1), K0).
179   // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
180   // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
181   return mi_match(
182       MI, MRI,
183       m_any_of(
184           m_CommutativeBinOp(
185               MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
186               m_Cst(K1)),
187           m_CommutativeBinOp(
188               MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
189               m_Cst(K0))));
190 }
191 
192 bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
193     MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
194   Register Dst = MI.getOperand(0).getReg();
195   if (!isVgprRegBank(Dst))
196     return false;
197 
198   // med3 for i16 is only available on gfx9+, and not available for v2i16.
199   LLT Ty = MRI.getType(Dst);
200   if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
201     return false;
202 
203   MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
204   Register Val;
205   std::optional<ValueAndVReg> K0, K1;
206   // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
207   if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
208     return false;
209 
210   if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
211     return false;
212   if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
213     return false;
214 
215   MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
216   return true;
217 }
218 
219 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
220 // ieee = true  : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
221 // ieee = false : min/max(NaN, K) = K
222 // clamp(NaN) = dx10_clamp ? 0.0 : NaN
223 // Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
224 // Other operand commutes (see matchMed) give same result since min and max are
225 // commutative.
226 
227 // Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
228 // with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0.
229 // Val = SNaN only for ieee = true
230 // fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
231 // min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
232 // max(min(SNaN, K1), K0) = max(K1, K0) = K1
233 // Val = NaN,ieee = false or Val = QNaN,ieee = true
234 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
235 // min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
236 // max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
237 bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
238     MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
239   Register Dst = MI.getOperand(0).getReg();
240   LLT Ty = MRI.getType(Dst);
241 
242   // med3 for f16 is only available on gfx9+, and not available for v2f16.
243   if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
244     return false;
245 
246   auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
247 
248   Register Val;
249   std::optional<FPValueAndVReg> K0, K1;
250   // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
251   if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
252     return false;
253 
254   if (K0->Value > K1->Value)
255     return false;
256 
257   // For IEEE=false perform combine only when it's safe to assume that there are
258   // no NaN inputs. Most often MI is marked with nnan fast math flag.
259   // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to
260   // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner
261   // nodes(max/min) have same behavior when one input is NaN and other isn't.
262   // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
263   // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
264   if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
265     // Don't fold single use constant that can't be inlined.
266     if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
267         (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
268       MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
269       return true;
270     }
271   }
272 
273   return false;
274 }
275 
276 bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
277                                                      Register &Reg) const {
278   // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
279   auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
280   Register Val;
281   std::optional<FPValueAndVReg> K0, K1;
282   // Match min(max(Val, K0), K1) or max(min(Val, K1), K0).
283   if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
284     return false;
285 
286   if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
287     return false;
288 
289   // For IEEE=false perform combine only when it's safe to assume that there are
290   // no NaN inputs. Most often MI is marked with nnan fast math flag.
291   // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
292   // to 0.0 requires dx10_clamp = true.
293   if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
294        isKnownNeverSNaN(Val, MRI)) ||
295       isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
296     Reg = Val;
297     return true;
298   }
299 
300   return false;
301 }
302 
303 // Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true.
304 // Val = SNaN only for ieee = true. It is important which operand is NaN.
305 // min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0
306 // min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0
307 // min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN
308 // Val = NaN,ieee = false or Val = QNaN,ieee = true
309 // min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
310 // min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
311 // min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
312 bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
313                                                    Register &Reg) const {
314   // In llvm-ir, clamp is often represented as an intrinsic call to
315   // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
316   MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
317   MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
318   MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
319 
320   if (isFCst(Src0) && !isFCst(Src1))
321     std::swap(Src0, Src1);
322   if (isFCst(Src1) && !isFCst(Src2))
323     std::swap(Src1, Src2);
324   if (isFCst(Src0) && !isFCst(Src1))
325     std::swap(Src0, Src1);
326   if (!isClampZeroToOne(Src1, Src2))
327     return false;
328 
329   Register Val = Src0->getOperand(0).getReg();
330 
331   auto isOp3Zero = [&]() {
332     MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
333     if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
334       return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
335     return false;
336   };
337   // For IEEE=false perform combine only when it's safe to assume that there are
338   // no NaN inputs. Most often MI is marked with nnan fast math flag.
339   // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
340   // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
341   if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
342       (getIEEE() && getDX10Clamp() &&
343        (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
344     Reg = Val;
345     return true;
346   }
347 
348   return false;
349 }
350 
351 void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &MI,
352                                            Register &Reg) const {
353   B.setInstrAndDebugLoc(MI);
354   B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
355                MI.getFlags());
356   MI.eraseFromParent();
357 }
358 
359 void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI,
360                                           Med3MatchInfo &MatchInfo) const {
361   B.setInstrAndDebugLoc(MI);
362   B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
363                {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
364                 getAsVgpr(MatchInfo.Val2)},
365                MI.getFlags());
366   MI.eraseFromParent();
367 }
368 
369 SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
370   return MF.getInfo<SIMachineFunctionInfo>()->getMode();
371 }
372 
373 bool AMDGPURegBankCombinerImpl::getIEEE() const { return getMode().IEEE; }
374 
375 bool AMDGPURegBankCombinerImpl::getDX10Clamp() const {
376   return getMode().DX10Clamp;
377 }
378 
379 bool AMDGPURegBankCombinerImpl::isFminnumIeee(const MachineInstr &MI) const {
380   return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
381 }
382 
383 bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *MI) const {
384   return MI->getOpcode() == AMDGPU::G_FCONSTANT;
385 }
386 
387 bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0,
388                                                  MachineInstr *K1) const {
389   if (isFCst(K0) && isFCst(K1)) {
390     const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
391     const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
392     return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) ||
393            (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0));
394   }
395   return false;
396 }
397 
398 // Pass boilerplate
399 // ================
400 
401 class AMDGPURegBankCombiner : public MachineFunctionPass {
402 public:
403   static char ID;
404 
405   AMDGPURegBankCombiner(bool IsOptNone = false);
406 
407   StringRef getPassName() const override { return "AMDGPURegBankCombiner"; }
408 
409   bool runOnMachineFunction(MachineFunction &MF) override;
410 
411   void getAnalysisUsage(AnalysisUsage &AU) const override;
412 
413 private:
414   bool IsOptNone;
415   AMDGPURegBankCombinerImplRuleConfig RuleConfig;
416 };
417 } // end anonymous namespace
418 
419 void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
420   AU.addRequired<TargetPassConfig>();
421   AU.setPreservesCFG();
422   getSelectionDAGFallbackAnalysisUsage(AU);
423   AU.addRequired<GISelKnownBitsAnalysis>();
424   AU.addPreserved<GISelKnownBitsAnalysis>();
425   if (!IsOptNone) {
426     AU.addRequired<MachineDominatorTree>();
427     AU.addPreserved<MachineDominatorTree>();
428   }
429   MachineFunctionPass::getAnalysisUsage(AU);
430 }
431 
432 AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
433     : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
434   initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
435 
436   if (!RuleConfig.parseCommandLineOption())
437     report_fatal_error("Invalid rule identifier");
438 }
439 
440 bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
441   if (MF.getProperties().hasProperty(
442           MachineFunctionProperties::Property::FailedISel))
443     return false;
444   auto *TPC = &getAnalysis<TargetPassConfig>();
445   const Function &F = MF.getFunction();
446   bool EnableOpt =
447       MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
448 
449   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
450   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
451 
452   const auto *LI = ST.getLegalizerInfo();
453   MachineDominatorTree *MDT =
454       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
455 
456   CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
457                      LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
458   AMDGPURegBankCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
459                                  RuleConfig, ST, MDT, LI);
460   return Impl.combineMachineInstrs();
461 }
462 
463 char AMDGPURegBankCombiner::ID = 0;
464 INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
465                       "Combine AMDGPU machine instrs after regbankselect",
466                       false, false)
467 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
468 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
469 INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
470                     "Combine AMDGPU machine instrs after regbankselect", false,
471                     false)
472 
473 namespace llvm {
474 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) {
475   return new AMDGPURegBankCombiner(IsOptNone);
476 }
477 } // end namespace llvm
478