xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUCombinerHelper.h"
16 #include "AMDGPULegalizerInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "llvm/CodeGen/GlobalISel/Combiner.h"
20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
23 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
24 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/TargetPassConfig.h"
28 #include "llvm/IR/IntrinsicsAMDGPU.h"
29 #include "llvm/Target/TargetMachine.h"
30 
31 #define GET_GICOMBINER_DEPS
32 #include "AMDGPUGenPreLegalizeGICombiner.inc"
33 #undef GET_GICOMBINER_DEPS
34 
35 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
36 
37 using namespace llvm;
38 using namespace MIPatternMatch;
39 
40 namespace {
41 #define GET_GICOMBINER_TYPES
42 #include "AMDGPUGenPostLegalizeGICombiner.inc"
43 #undef GET_GICOMBINER_TYPES
44 
45 class AMDGPUPostLegalizerCombinerImpl : public Combiner {
46 protected:
47   const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;
48   const GCNSubtarget &STI;
49   const SIInstrInfo &TII;
50   // TODO: Make CombinerHelper methods const.
51   mutable AMDGPUCombinerHelper Helper;
52 
53 public:
54   AMDGPUPostLegalizerCombinerImpl(
55       MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
56       GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
57       const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
58       const GCNSubtarget &STI, MachineDominatorTree *MDT,
59       const LegalizerInfo *LI);
60 
getName()61   static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }
62 
63   bool tryCombineAllImpl(MachineInstr &I) const;
64   bool tryCombineAll(MachineInstr &I) const override;
65 
66   struct FMinFMaxLegacyInfo {
67     Register LHS;
68     Register RHS;
69     CmpInst::Predicate Pred;
70   };
71 
72   // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
73   bool matchFMinFMaxLegacy(MachineInstr &MI, MachineInstr &FCmp,
74                            FMinFMaxLegacyInfo &Info) const;
75   void applySelectFCmpToFMinFMaxLegacy(MachineInstr &MI,
76                                        const FMinFMaxLegacyInfo &Info) const;
77 
78   bool matchUCharToFloat(MachineInstr &MI) const;
79   void applyUCharToFloat(MachineInstr &MI) const;
80 
81   bool
82   matchRcpSqrtToRsq(MachineInstr &MI,
83                     std::function<void(MachineIRBuilder &)> &MatchInfo) const;
84 
85   bool matchFDivSqrtToRsqF16(MachineInstr &MI) const;
86   void applyFDivSqrtToRsqF16(MachineInstr &MI, const Register &X) const;
87 
88   // FIXME: Should be able to have 2 separate matchdatas rather than custom
89   // struct boilerplate.
90   struct CvtF32UByteMatchInfo {
91     Register CvtVal;
92     unsigned ShiftOffset;
93   };
94 
95   bool matchCvtF32UByteN(MachineInstr &MI,
96                          CvtF32UByteMatchInfo &MatchInfo) const;
97   void applyCvtF32UByteN(MachineInstr &MI,
98                          const CvtF32UByteMatchInfo &MatchInfo) const;
99 
100   bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const;
101 
102   // Combine unsigned buffer load and signed extension instructions to generate
103   // signed buffer load instructions.
104   bool matchCombineSignExtendInReg(
105       MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
106   void applyCombineSignExtendInReg(
107       MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
108 
109   // Find the s_mul_u64 instructions where the higher bits are either
110   // zero-extended or sign-extended.
111   // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher
112   // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32
113   // bits are zero extended.
114   bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;
115 
116 private:
117 #define GET_GICOMBINER_CLASS_MEMBERS
118 #define AMDGPUSubtarget GCNSubtarget
119 #include "AMDGPUGenPostLegalizeGICombiner.inc"
120 #undef GET_GICOMBINER_CLASS_MEMBERS
121 #undef AMDGPUSubtarget
122 };
123 
124 #define GET_GICOMBINER_IMPL
125 #define AMDGPUSubtarget GCNSubtarget
126 #include "AMDGPUGenPostLegalizeGICombiner.inc"
127 #undef AMDGPUSubtarget
128 #undef GET_GICOMBINER_IMPL
129 
AMDGPUPostLegalizerCombinerImpl(MachineFunction & MF,CombinerInfo & CInfo,const TargetPassConfig * TPC,GISelKnownBits & KB,GISelCSEInfo * CSEInfo,const AMDGPUPostLegalizerCombinerImplRuleConfig & RuleConfig,const GCNSubtarget & STI,MachineDominatorTree * MDT,const LegalizerInfo * LI)130 AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
131     MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
132     GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
133     const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
134     const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
135     : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
136       TII(*STI.getInstrInfo()),
137       Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
138 #define GET_GICOMBINER_CONSTRUCTOR_INITS
139 #include "AMDGPUGenPostLegalizeGICombiner.inc"
140 #undef GET_GICOMBINER_CONSTRUCTOR_INITS
141 {
142 }
143 
tryCombineAll(MachineInstr & MI) const144 bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
145   if (tryCombineAllImpl(MI))
146     return true;
147 
148   switch (MI.getOpcode()) {
149   case TargetOpcode::G_SHL:
150   case TargetOpcode::G_LSHR:
151   case TargetOpcode::G_ASHR:
152     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
153     // common case, splitting this into a move and a 32-bit shift is faster and
154     // the same code size.
155     return Helper.tryCombineShiftToUnmerge(MI, 32);
156   }
157 
158   return false;
159 }
160 
matchFMinFMaxLegacy(MachineInstr & MI,MachineInstr & FCmp,FMinFMaxLegacyInfo & Info) const161 bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
162     MachineInstr &MI, MachineInstr &FCmp, FMinFMaxLegacyInfo &Info) const {
163   if (!MRI.hasOneNonDBGUse(FCmp.getOperand(0).getReg()))
164     return false;
165 
166   Info.Pred =
167       static_cast<CmpInst::Predicate>(FCmp.getOperand(1).getPredicate());
168   Info.LHS = FCmp.getOperand(2).getReg();
169   Info.RHS = FCmp.getOperand(3).getReg();
170   Register True = MI.getOperand(2).getReg();
171   Register False = MI.getOperand(3).getReg();
172 
173   // TODO: Handle case where the the selected value is an fneg and the compared
174   // constant is the negation of the selected value.
175   if ((Info.LHS != True || Info.RHS != False) &&
176       (Info.LHS != False || Info.RHS != True))
177     return false;
178 
179   // Invert the predicate if necessary so that the apply function can assume
180   // that the select operands are the same as the fcmp operands.
181   // (select (fcmp P, L, R), R, L) -> (select (fcmp !P, L, R), L, R)
182   if (Info.LHS != True)
183     Info.Pred = CmpInst::getInversePredicate(Info.Pred);
184 
185   // Only match </<=/>=/> not ==/!= etc.
186   return Info.Pred != CmpInst::getSwappedPredicate(Info.Pred);
187 }
188 
applySelectFCmpToFMinFMaxLegacy(MachineInstr & MI,const FMinFMaxLegacyInfo & Info) const189 void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinFMaxLegacy(
190     MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {
191   unsigned Opc = (Info.Pred & CmpInst::FCMP_OGT) ? AMDGPU::G_AMDGPU_FMAX_LEGACY
192                                                  : AMDGPU::G_AMDGPU_FMIN_LEGACY;
193   Register X = Info.LHS;
194   Register Y = Info.RHS;
195   if (Info.Pred == CmpInst::getUnorderedPredicate(Info.Pred)) {
196     // We need to permute the operands to get the correct NaN behavior. The
197     // selected operand is the second one based on the failing compare with NaN,
198     // so permute it based on the compare type the hardware uses.
199     std::swap(X, Y);
200   }
201 
202   B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
203 
204   MI.eraseFromParent();
205 }
206 
matchUCharToFloat(MachineInstr & MI) const207 bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(
208     MachineInstr &MI) const {
209   Register DstReg = MI.getOperand(0).getReg();
210 
211   // TODO: We could try to match extracting the higher bytes, which would be
212   // easier if i8 vectors weren't promoted to i32 vectors, particularly after
213   // types are legalized. v4i8 -> v4f32 is probably the only case to worry
214   // about in practice.
215   LLT Ty = MRI.getType(DstReg);
216   if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
217     Register SrcReg = MI.getOperand(1).getReg();
218     unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
219     assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
220     const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
221     return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
222   }
223 
224   return false;
225 }
226 
applyUCharToFloat(MachineInstr & MI) const227 void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(
228     MachineInstr &MI) const {
229   const LLT S32 = LLT::scalar(32);
230 
231   Register DstReg = MI.getOperand(0).getReg();
232   Register SrcReg = MI.getOperand(1).getReg();
233   LLT Ty = MRI.getType(DstReg);
234   LLT SrcTy = MRI.getType(SrcReg);
235   if (SrcTy != S32)
236     SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
237 
238   if (Ty == S32) {
239     B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},
240                  MI.getFlags());
241   } else {
242     auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},
243                              MI.getFlags());
244     B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
245   }
246 
247   MI.eraseFromParent();
248 }
249 
matchRcpSqrtToRsq(MachineInstr & MI,std::function<void (MachineIRBuilder &)> & MatchInfo) const250 bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
251     MachineInstr &MI,
252     std::function<void(MachineIRBuilder &)> &MatchInfo) const {
253   auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * {
254     if (!MI.getFlag(MachineInstr::FmContract))
255       return nullptr;
256 
257     if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
258       if (GI->is(Intrinsic::amdgcn_rcp))
259         return MRI.getVRegDef(MI.getOperand(2).getReg());
260     }
261     return nullptr;
262   };
263 
264   auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * {
265     if (!MI.getFlag(MachineInstr::FmContract))
266       return nullptr;
267     MachineInstr *SqrtSrcMI = nullptr;
268     auto Match =
269         mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
270     (void)Match;
271     return SqrtSrcMI;
272   };
273 
274   MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
275   // rcp(sqrt(x))
276   if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
277     MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
278       B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
279           .addUse(SqrtSrcMI->getOperand(0).getReg())
280           .setMIFlags(MI.getFlags());
281     };
282     return true;
283   }
284 
285   // sqrt(rcp(x))
286   if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
287     MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
288       B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
289           .addUse(RcpSrcMI->getOperand(0).getReg())
290           .setMIFlags(MI.getFlags());
291     };
292     return true;
293   }
294   return false;
295 }
296 
matchFDivSqrtToRsqF16(MachineInstr & MI) const297 bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsqF16(
298     MachineInstr &MI) const {
299   Register Sqrt = MI.getOperand(2).getReg();
300   return MRI.hasOneNonDBGUse(Sqrt);
301 }
302 
applyFDivSqrtToRsqF16(MachineInstr & MI,const Register & X) const303 void AMDGPUPostLegalizerCombinerImpl::applyFDivSqrtToRsqF16(
304     MachineInstr &MI, const Register &X) const {
305   Register Dst = MI.getOperand(0).getReg();
306   Register Y = MI.getOperand(1).getReg();
307   LLT DstTy = MRI.getType(Dst);
308   uint32_t Flags = MI.getFlags();
309   Register RSQ = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {DstTy})
310                      .addUse(X)
311                      .setMIFlags(Flags)
312                      .getReg(0);
313   B.buildFMul(Dst, RSQ, Y, Flags);
314   MI.eraseFromParent();
315 }
316 
matchCvtF32UByteN(MachineInstr & MI,CvtF32UByteMatchInfo & MatchInfo) const317 bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
318     MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
319   Register SrcReg = MI.getOperand(1).getReg();
320 
321   // Look through G_ZEXT.
322   bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
323 
324   Register Src0;
325   int64_t ShiftAmt;
326   IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
327   if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
328     const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
329 
330     unsigned ShiftOffset = 8 * Offset;
331     if (IsShr)
332       ShiftOffset += ShiftAmt;
333     else
334       ShiftOffset -= ShiftAmt;
335 
336     MatchInfo.CvtVal = Src0;
337     MatchInfo.ShiftOffset = ShiftOffset;
338     return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
339   }
340 
341   // TODO: Simplify demanded bits.
342   return false;
343 }
344 
applyCvtF32UByteN(MachineInstr & MI,const CvtF32UByteMatchInfo & MatchInfo) const345 void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(
346     MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const {
347   unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
348 
349   const LLT S32 = LLT::scalar(32);
350   Register CvtSrc = MatchInfo.CvtVal;
351   LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
352   if (SrcTy != S32) {
353     assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
354     CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
355   }
356 
357   assert(MI.getOpcode() != NewOpc);
358   B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
359   MI.eraseFromParent();
360 }
361 
matchRemoveFcanonicalize(MachineInstr & MI,Register & Reg) const362 bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
363     MachineInstr &MI, Register &Reg) const {
364   const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
365       MF.getSubtarget().getTargetLowering());
366   Reg = MI.getOperand(1).getReg();
367   return TLI->isCanonicalized(Reg, MF);
368 }
369 
370 // The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
371 // u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
372 // with sign extension instrucions in order to generate buffer_load_{i8, i16}
373 // instructions.
374 
375 // Identify buffer_load_{u8, u16}.
matchCombineSignExtendInReg(MachineInstr & MI,std::pair<MachineInstr *,unsigned> & MatchData) const376 bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
377     MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
378   Register LoadReg = MI.getOperand(1).getReg();
379   if (!MRI.hasOneNonDBGUse(LoadReg))
380     return false;
381 
382   // Check if the first operand of the sign extension is a subword buffer load
383   // instruction.
384   MachineInstr *LoadMI = MRI.getVRegDef(LoadReg);
385   int64_t Width = MI.getOperand(2).getImm();
386   switch (LoadMI->getOpcode()) {
387   case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
388     MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE};
389     return Width == 8;
390   case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
391     MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT};
392     return Width == 16;
393   case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
394     MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE};
395     return Width == 8;
396   case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
397     MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT};
398     return Width == 16;
399   }
400   return false;
401 }
402 
403 // Combine buffer_load_{u8, u16} and the sign extension instruction to generate
404 // buffer_load_{i8, i16}.
applyCombineSignExtendInReg(MachineInstr & MI,std::pair<MachineInstr *,unsigned> & MatchData) const405 void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
406     MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
407   auto [LoadMI, NewOpcode] = MatchData;
408   LoadMI->setDesc(TII.get(NewOpcode));
409   // Update the destination register of the load with the destination register
410   // of the sign extension.
411   Register SignExtendInsnDst = MI.getOperand(0).getReg();
412   LoadMI->getOperand(0).setReg(SignExtendInsnDst);
413   // Remove the sign extension.
414   MI.eraseFromParent();
415 }
416 
matchCombine_s_mul_u64(MachineInstr & MI,unsigned & NewOpcode) const417 bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64(
418     MachineInstr &MI, unsigned &NewOpcode) const {
419   Register Src0 = MI.getOperand(1).getReg();
420   Register Src1 = MI.getOperand(2).getReg();
421   if (MRI.getType(Src0) != LLT::scalar(64))
422     return false;
423 
424   if (KB->getKnownBits(Src1).countMinLeadingZeros() >= 32 &&
425       KB->getKnownBits(Src0).countMinLeadingZeros() >= 32) {
426     NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32;
427     return true;
428   }
429 
430   if (KB->computeNumSignBits(Src1) >= 33 &&
431       KB->computeNumSignBits(Src0) >= 33) {
432     NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32;
433     return true;
434   }
435   return false;
436 }
437 
438 // Pass boilerplate
439 // ================
440 
441 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
442 public:
443   static char ID;
444 
445   AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
446 
getPassName() const447   StringRef getPassName() const override {
448     return "AMDGPUPostLegalizerCombiner";
449   }
450 
451   bool runOnMachineFunction(MachineFunction &MF) override;
452 
453   void getAnalysisUsage(AnalysisUsage &AU) const override;
454 
455 private:
456   bool IsOptNone;
457   AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;
458 };
459 } // end anonymous namespace
460 
getAnalysisUsage(AnalysisUsage & AU) const461 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
462   AU.addRequired<TargetPassConfig>();
463   AU.setPreservesCFG();
464   getSelectionDAGFallbackAnalysisUsage(AU);
465   AU.addRequired<GISelKnownBitsAnalysis>();
466   AU.addPreserved<GISelKnownBitsAnalysis>();
467   if (!IsOptNone) {
468     AU.addRequired<MachineDominatorTreeWrapperPass>();
469     AU.addPreserved<MachineDominatorTreeWrapperPass>();
470   }
471   MachineFunctionPass::getAnalysisUsage(AU);
472 }
473 
AMDGPUPostLegalizerCombiner(bool IsOptNone)474 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
475     : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
476   initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
477 
478   if (!RuleConfig.parseCommandLineOption())
479     report_fatal_error("Invalid rule identifier");
480 }
481 
runOnMachineFunction(MachineFunction & MF)482 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
483   if (MF.getProperties().hasProperty(
484           MachineFunctionProperties::Property::FailedISel))
485     return false;
486   auto *TPC = &getAnalysis<TargetPassConfig>();
487   const Function &F = MF.getFunction();
488   bool EnableOpt =
489       MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
490 
491   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
492   const AMDGPULegalizerInfo *LI =
493       static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
494 
495   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
496   MachineDominatorTree *MDT =
497       IsOptNone ? nullptr
498                 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
499 
500   CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
501                      LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
502 
503   AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
504                                        RuleConfig, ST, MDT, LI);
505   return Impl.combineMachineInstrs();
506 }
507 
508 char AMDGPUPostLegalizerCombiner::ID = 0;
509 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
510                       "Combine AMDGPU machine instrs after legalization", false,
511                       false)
512 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
513 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
514 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
515                     "Combine AMDGPU machine instrs after legalization", false,
516                     false)
517 
518 namespace llvm {
createAMDGPUPostLegalizeCombiner(bool IsOptNone)519 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
520   return new AMDGPUPostLegalizerCombiner(IsOptNone);
521 }
522 } // end namespace llvm
523