xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp (revision 5fb307d29b364982acbde82cbf77db3cae486f8c)
1 //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after register banks are known.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "llvm/CodeGen/GlobalISel/Combiner.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
22 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
23 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
24 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
25 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
26 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
27 #include "llvm/CodeGen/MachineDominators.h"
28 #include "llvm/CodeGen/TargetPassConfig.h"
29 #include "llvm/IR/IntrinsicsAMDGPU.h"
30 #include "llvm/Target/TargetMachine.h"
31 
32 #define GET_GICOMBINER_DEPS
33 #include "AMDGPUGenPreLegalizeGICombiner.inc"
34 #undef GET_GICOMBINER_DEPS
35 
36 #define DEBUG_TYPE "amdgpu-regbank-combiner"
37 
38 using namespace llvm;
39 using namespace MIPatternMatch;
40 
41 namespace {
42 #define GET_GICOMBINER_TYPES
43 #include "AMDGPUGenRegBankGICombiner.inc"
44 #undef GET_GICOMBINER_TYPES
45 
46 class AMDGPURegBankCombinerImpl : public GIMatchTableExecutor {
47 protected:
48   const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
49 
50   MachineIRBuilder &B;
51   MachineFunction &MF;
52   MachineRegisterInfo &MRI;
53   const GCNSubtarget &STI;
54   const RegisterBankInfo &RBI;
55   const TargetRegisterInfo &TRI;
56   const SIInstrInfo &TII;
57   CombinerHelper &Helper;
58   GISelChangeObserver &Observer;
59 
60 public:
61   AMDGPURegBankCombinerImpl(
62       const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
63       MachineIRBuilder &B, CombinerHelper &Helper,
64       GISelChangeObserver &Observer);
65 
66   static const char *getName() { return "AMDGPURegBankCombinerImpl"; }
67 
68   bool tryCombineAll(MachineInstr &I) const;
69 
70   bool isVgprRegBank(Register Reg) const;
71   Register getAsVgpr(Register Reg) const;
72 
73   struct MinMaxMedOpc {
74     unsigned Min, Max, Med;
75   };
76 
77   struct Med3MatchInfo {
78     unsigned Opc;
79     Register Val0, Val1, Val2;
80   };
81 
82   MinMaxMedOpc getMinMaxPair(unsigned Opc) const;
83 
84   template <class m_Cst, typename CstTy>
85   bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
86                 Register &Val, CstTy &K0, CstTy &K1) const;
87 
88   bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
89   bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
90   bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg) const;
91   bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg) const;
92   void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
93   void applyClamp(MachineInstr &MI, Register &Reg) const;
94 
95 private:
96   SIModeRegisterDefaults getMode() const;
97   bool getIEEE() const;
98   bool getDX10Clamp() const;
99   bool isFminnumIeee(const MachineInstr &MI) const;
100   bool isFCst(MachineInstr *MI) const;
101   bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1) const;
102 
103 #define GET_GICOMBINER_CLASS_MEMBERS
104 #define AMDGPUSubtarget GCNSubtarget
105 #include "AMDGPUGenRegBankGICombiner.inc"
106 #undef GET_GICOMBINER_CLASS_MEMBERS
107 #undef AMDGPUSubtarget
108 };
109 
110 #define GET_GICOMBINER_IMPL
111 #define AMDGPUSubtarget GCNSubtarget
112 #include "AMDGPUGenRegBankGICombiner.inc"
113 #undef AMDGPUSubtarget
114 #undef GET_GICOMBINER_IMPL
115 
116 AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
117     const AMDGPURegBankCombinerImplRuleConfig &RuleConfig, MachineIRBuilder &B,
118     CombinerHelper &Helper, GISelChangeObserver &Observer)
119     : RuleConfig(RuleConfig), B(B), MF(B.getMF()), MRI(*B.getMRI()),
120       STI(MF.getSubtarget<GCNSubtarget>()), RBI(*STI.getRegBankInfo()),
121       TRI(*STI.getRegisterInfo()), TII(*STI.getInstrInfo()), Helper(Helper),
122       Observer(Observer),
123 #define GET_GICOMBINER_CONSTRUCTOR_INITS
124 #include "AMDGPUGenRegBankGICombiner.inc"
125 #undef GET_GICOMBINER_CONSTRUCTOR_INITS
126 {
127 }
128 
129 bool AMDGPURegBankCombinerImpl::isVgprRegBank(Register Reg) const {
130   return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
131 }
132 
133 Register AMDGPURegBankCombinerImpl::getAsVgpr(Register Reg) const {
134   if (isVgprRegBank(Reg))
135     return Reg;
136 
137   // Search for existing copy of Reg to vgpr.
138   for (MachineInstr &Use : MRI.use_instructions(Reg)) {
139     Register Def = Use.getOperand(0).getReg();
140     if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
141       return Def;
142   }
143 
144   // Copy Reg to vgpr.
145   Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
146   MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
147   return VgprReg;
148 }
149 
150 AMDGPURegBankCombinerImpl::MinMaxMedOpc
151 AMDGPURegBankCombinerImpl::getMinMaxPair(unsigned Opc) const {
152   switch (Opc) {
153   default:
154     llvm_unreachable("Unsupported opcode");
155   case AMDGPU::G_SMAX:
156   case AMDGPU::G_SMIN:
157     return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
158   case AMDGPU::G_UMAX:
159   case AMDGPU::G_UMIN:
160     return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
161   case AMDGPU::G_FMAXNUM:
162   case AMDGPU::G_FMINNUM:
163     return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
164   case AMDGPU::G_FMAXNUM_IEEE:
165   case AMDGPU::G_FMINNUM_IEEE:
166     return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
167             AMDGPU::G_AMDGPU_FMED3};
168   }
169 }
170 
171 template <class m_Cst, typename CstTy>
172 bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI,
173                                          MachineRegisterInfo &MRI,
174                                          MinMaxMedOpc MMMOpc, Register &Val,
175                                          CstTy &K0, CstTy &K1) const {
176   // 4 operand commutes of: min(max(Val, K0), K1).
177   // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
178   // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
179   // 4 operand commutes of: max(min(Val, K1), K0).
180   // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
181   // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
182   return mi_match(
183       MI, MRI,
184       m_any_of(
185           m_CommutativeBinOp(
186               MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
187               m_Cst(K1)),
188           m_CommutativeBinOp(
189               MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
190               m_Cst(K0))));
191 }
192 
193 bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
194     MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
195   Register Dst = MI.getOperand(0).getReg();
196   if (!isVgprRegBank(Dst))
197     return false;
198 
199   // med3 for i16 is only available on gfx9+, and not available for v2i16.
200   LLT Ty = MRI.getType(Dst);
201   if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
202     return false;
203 
204   MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
205   Register Val;
206   std::optional<ValueAndVReg> K0, K1;
207   // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
208   if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
209     return false;
210 
211   if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
212     return false;
213   if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
214     return false;
215 
216   MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
217   return true;
218 }
219 
220 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
221 // ieee = true  : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
222 // ieee = false : min/max(NaN, K) = K
223 // clamp(NaN) = dx10_clamp ? 0.0 : NaN
224 // Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
225 // Other operand commutes (see matchMed) give same result since min and max are
226 // commutative.
227 
228 // Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
229 // with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0.
230 // Val = SNaN only for ieee = true
231 // fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
232 // min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
233 // max(min(SNaN, K1), K0) = max(K1, K0) = K1
234 // Val = NaN,ieee = false or Val = QNaN,ieee = true
235 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
236 // min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
237 // max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
238 bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
239     MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
240   Register Dst = MI.getOperand(0).getReg();
241   LLT Ty = MRI.getType(Dst);
242 
243   // med3 for f16 is only available on gfx9+, and not available for v2f16.
244   if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
245     return false;
246 
247   auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
248 
249   Register Val;
250   std::optional<FPValueAndVReg> K0, K1;
251   // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
252   if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
253     return false;
254 
255   if (K0->Value > K1->Value)
256     return false;
257 
258   // For IEEE=false perform combine only when it's safe to assume that there are
259   // no NaN inputs. Most often MI is marked with nnan fast math flag.
260   // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to
261   // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner
262   // nodes(max/min) have same behavior when one input is NaN and other isn't.
263   // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
264   // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
265   if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
266     // Don't fold single use constant that can't be inlined.
267     if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
268         (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
269       MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
270       return true;
271     }
272   }
273 
274   return false;
275 }
276 
277 bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
278                                                      Register &Reg) const {
279   // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
280   auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
281   Register Val;
282   std::optional<FPValueAndVReg> K0, K1;
283   // Match min(max(Val, K0), K1) or max(min(Val, K1), K0).
284   if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
285     return false;
286 
287   if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
288     return false;
289 
290   // For IEEE=false perform combine only when it's safe to assume that there are
291   // no NaN inputs. Most often MI is marked with nnan fast math flag.
292   // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
293   // to 0.0 requires dx10_clamp = true.
294   if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
295        isKnownNeverSNaN(Val, MRI)) ||
296       isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
297     Reg = Val;
298     return true;
299   }
300 
301   return false;
302 }
303 
304 // Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true.
305 // Val = SNaN only for ieee = true. It is important which operand is NaN.
306 // min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0
307 // min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0
308 // min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN
309 // Val = NaN,ieee = false or Val = QNaN,ieee = true
310 // min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
311 // min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
312 // min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
313 bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
314                                                    Register &Reg) const {
315   // In llvm-ir, clamp is often represented as an intrinsic call to
316   // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
317   MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
318   MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
319   MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
320 
321   if (isFCst(Src0) && !isFCst(Src1))
322     std::swap(Src0, Src1);
323   if (isFCst(Src1) && !isFCst(Src2))
324     std::swap(Src1, Src2);
325   if (isFCst(Src0) && !isFCst(Src1))
326     std::swap(Src0, Src1);
327   if (!isClampZeroToOne(Src1, Src2))
328     return false;
329 
330   Register Val = Src0->getOperand(0).getReg();
331 
332   auto isOp3Zero = [&]() {
333     MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
334     if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
335       return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
336     return false;
337   };
338   // For IEEE=false perform combine only when it's safe to assume that there are
339   // no NaN inputs. Most often MI is marked with nnan fast math flag.
340   // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
341   // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
342   if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
343       (getIEEE() && getDX10Clamp() &&
344        (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
345     Reg = Val;
346     return true;
347   }
348 
349   return false;
350 }
351 
352 void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &MI,
353                                            Register &Reg) const {
354   B.setInstrAndDebugLoc(MI);
355   B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
356                MI.getFlags());
357   MI.eraseFromParent();
358 }
359 
360 void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI,
361                                           Med3MatchInfo &MatchInfo) const {
362   B.setInstrAndDebugLoc(MI);
363   B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
364                {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
365                 getAsVgpr(MatchInfo.Val2)},
366                MI.getFlags());
367   MI.eraseFromParent();
368 }
369 
370 SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
371   return MF.getInfo<SIMachineFunctionInfo>()->getMode();
372 }
373 
374 bool AMDGPURegBankCombinerImpl::getIEEE() const { return getMode().IEEE; }
375 
376 bool AMDGPURegBankCombinerImpl::getDX10Clamp() const {
377   return getMode().DX10Clamp;
378 }
379 
380 bool AMDGPURegBankCombinerImpl::isFminnumIeee(const MachineInstr &MI) const {
381   return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
382 }
383 
384 bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *MI) const {
385   return MI->getOpcode() == AMDGPU::G_FCONSTANT;
386 }
387 
388 bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0,
389                                                  MachineInstr *K1) const {
390   if (isFCst(K0) && isFCst(K1)) {
391     const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
392     const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
393     return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) ||
394            (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0));
395   }
396   return false;
397 }
398 
399 class AMDGPURegBankCombinerInfo final : public CombinerInfo {
400   GISelKnownBits *KB;
401   MachineDominatorTree *MDT;
402   AMDGPURegBankCombinerImplRuleConfig RuleConfig;
403 
404 public:
405   AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
406                             const AMDGPULegalizerInfo *LI, GISelKnownBits *KB,
407                             MachineDominatorTree *MDT)
408       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
409                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
410         KB(KB), MDT(MDT) {
411     if (!RuleConfig.parseCommandLineOption())
412       report_fatal_error("Invalid rule identifier");
413   }
414 
415   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
416                MachineIRBuilder &B) const override;
417 };
418 
419 bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
420                                         MachineInstr &MI,
421                                         MachineIRBuilder &B) const {
422   CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false, KB, MDT);
423   // TODO: Do not re-create the Impl on every inst, it should be per function.
424   AMDGPURegBankCombinerImpl Impl(RuleConfig, B, Helper, Observer);
425   Impl.setupMF(*MI.getMF(), KB);
426   return Impl.tryCombineAll(MI);
427 }
428 
429 // Pass boilerplate
430 // ================
431 
432 class AMDGPURegBankCombiner : public MachineFunctionPass {
433 public:
434   static char ID;
435 
436   AMDGPURegBankCombiner(bool IsOptNone = false);
437 
438   StringRef getPassName() const override { return "AMDGPURegBankCombiner"; }
439 
440   bool runOnMachineFunction(MachineFunction &MF) override;
441 
442   void getAnalysisUsage(AnalysisUsage &AU) const override;
443 private:
444   bool IsOptNone;
445 };
446 } // end anonymous namespace
447 
448 void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
449   AU.addRequired<TargetPassConfig>();
450   AU.setPreservesCFG();
451   getSelectionDAGFallbackAnalysisUsage(AU);
452   AU.addRequired<GISelKnownBitsAnalysis>();
453   AU.addPreserved<GISelKnownBitsAnalysis>();
454   if (!IsOptNone) {
455     AU.addRequired<MachineDominatorTree>();
456     AU.addPreserved<MachineDominatorTree>();
457   }
458   MachineFunctionPass::getAnalysisUsage(AU);
459 }
460 
461 AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
462     : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
463   initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
464 }
465 
466 bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
467   if (MF.getProperties().hasProperty(
468           MachineFunctionProperties::Property::FailedISel))
469     return false;
470   auto *TPC = &getAnalysis<TargetPassConfig>();
471   const Function &F = MF.getFunction();
472   bool EnableOpt =
473       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
474 
475   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
476   const AMDGPULegalizerInfo *LI =
477       static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
478 
479   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
480   MachineDominatorTree *MDT =
481       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
482   AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), F.hasMinSize(),
483                                    LI, KB, MDT);
484   Combiner C(PCInfo, TPC);
485   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
486 }
487 
488 char AMDGPURegBankCombiner::ID = 0;
489 INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
490                       "Combine AMDGPU machine instrs after regbankselect",
491                       false, false)
492 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
493 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
494 INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
495                     "Combine AMDGPU machine instrs after regbankselect", false,
496                     false)
497 
498 namespace llvm {
499 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) {
500   return new AMDGPURegBankCombiner(IsOptNone);
501 }
502 } // end namespace llvm
503