xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1*700637cbSDimitry Andric //===-- AMDGPURegBankLegalize.cpp -----------------------------------------===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric /// Lower G_ instructions that can't be inst-selected with register bank
10*700637cbSDimitry Andric /// assignment from AMDGPURegBankSelect based on machine uniformity info.
11*700637cbSDimitry Andric /// Given types on all operands, some register bank assignments require lowering
12*700637cbSDimitry Andric /// while others do not.
13*700637cbSDimitry Andric /// Note: cases where all register bank assignments would require lowering are
14*700637cbSDimitry Andric /// lowered in legalizer.
15*700637cbSDimitry Andric /// For example vgpr S64 G_AND requires lowering to S32 while sgpr S64 does not.
16*700637cbSDimitry Andric /// Eliminate sgpr S1 by lowering to sgpr S32.
17*700637cbSDimitry Andric //
18*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
19*700637cbSDimitry Andric 
20*700637cbSDimitry Andric #include "AMDGPU.h"
21*700637cbSDimitry Andric #include "AMDGPUGlobalISelUtils.h"
22*700637cbSDimitry Andric #include "AMDGPURegBankLegalizeHelper.h"
23*700637cbSDimitry Andric #include "GCNSubtarget.h"
24*700637cbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
25*700637cbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
26*700637cbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
27*700637cbSDimitry Andric #include "llvm/CodeGen/MachineUniformityAnalysis.h"
28*700637cbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
29*700637cbSDimitry Andric #include "llvm/InitializePasses.h"
30*700637cbSDimitry Andric 
31*700637cbSDimitry Andric #define DEBUG_TYPE "amdgpu-regbanklegalize"
32*700637cbSDimitry Andric 
33*700637cbSDimitry Andric using namespace llvm;
34*700637cbSDimitry Andric using namespace AMDGPU;
35*700637cbSDimitry Andric 
36*700637cbSDimitry Andric namespace {
37*700637cbSDimitry Andric 
38*700637cbSDimitry Andric class AMDGPURegBankLegalize : public MachineFunctionPass {
39*700637cbSDimitry Andric public:
40*700637cbSDimitry Andric   static char ID;
41*700637cbSDimitry Andric 
42*700637cbSDimitry Andric public:
AMDGPURegBankLegalize()43*700637cbSDimitry Andric   AMDGPURegBankLegalize() : MachineFunctionPass(ID) {}
44*700637cbSDimitry Andric 
45*700637cbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
46*700637cbSDimitry Andric 
getPassName() const47*700637cbSDimitry Andric   StringRef getPassName() const override {
48*700637cbSDimitry Andric     return "AMDGPU Register Bank Legalize";
49*700637cbSDimitry Andric   }
50*700637cbSDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const51*700637cbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
52*700637cbSDimitry Andric     AU.addRequired<TargetPassConfig>();
53*700637cbSDimitry Andric     AU.addRequired<GISelCSEAnalysisWrapperPass>();
54*700637cbSDimitry Andric     AU.addRequired<MachineUniformityAnalysisPass>();
55*700637cbSDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
56*700637cbSDimitry Andric   }
57*700637cbSDimitry Andric 
58*700637cbSDimitry Andric   // If there were no phis and we do waterfall expansion machine verifier would
59*700637cbSDimitry Andric   // fail.
getClearedProperties() const60*700637cbSDimitry Andric   MachineFunctionProperties getClearedProperties() const override {
61*700637cbSDimitry Andric     return MachineFunctionProperties().setNoPHIs();
62*700637cbSDimitry Andric   }
63*700637cbSDimitry Andric };
64*700637cbSDimitry Andric 
65*700637cbSDimitry Andric } // End anonymous namespace.
66*700637cbSDimitry Andric 
67*700637cbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPURegBankLegalize, DEBUG_TYPE,
68*700637cbSDimitry Andric                       "AMDGPU Register Bank Legalize", false, false)
69*700637cbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
70*700637cbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
71*700637cbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
72*700637cbSDimitry Andric INITIALIZE_PASS_END(AMDGPURegBankLegalize, DEBUG_TYPE,
73*700637cbSDimitry Andric                     "AMDGPU Register Bank Legalize", false, false)
74*700637cbSDimitry Andric 
75*700637cbSDimitry Andric char AMDGPURegBankLegalize::ID = 0;
76*700637cbSDimitry Andric 
77*700637cbSDimitry Andric char &llvm::AMDGPURegBankLegalizeID = AMDGPURegBankLegalize::ID;
78*700637cbSDimitry Andric 
createAMDGPURegBankLegalizePass()79*700637cbSDimitry Andric FunctionPass *llvm::createAMDGPURegBankLegalizePass() {
80*700637cbSDimitry Andric   return new AMDGPURegBankLegalize();
81*700637cbSDimitry Andric }
82*700637cbSDimitry Andric 
getRules(const GCNSubtarget & ST,MachineRegisterInfo & MRI)83*700637cbSDimitry Andric const RegBankLegalizeRules &getRules(const GCNSubtarget &ST,
84*700637cbSDimitry Andric                                      MachineRegisterInfo &MRI) {
85*700637cbSDimitry Andric   static std::mutex GlobalMutex;
86*700637cbSDimitry Andric   static SmallDenseMap<unsigned, std::unique_ptr<RegBankLegalizeRules>>
87*700637cbSDimitry Andric       CacheForRuleSet;
88*700637cbSDimitry Andric   std::lock_guard<std::mutex> Lock(GlobalMutex);
89*700637cbSDimitry Andric   auto [It, Inserted] = CacheForRuleSet.try_emplace(ST.getGeneration());
90*700637cbSDimitry Andric   if (Inserted)
91*700637cbSDimitry Andric     It->second = std::make_unique<RegBankLegalizeRules>(ST, MRI);
92*700637cbSDimitry Andric   else
93*700637cbSDimitry Andric     It->second->refreshRefs(ST, MRI);
94*700637cbSDimitry Andric   return *It->second;
95*700637cbSDimitry Andric }
96*700637cbSDimitry Andric 
97*700637cbSDimitry Andric class AMDGPURegBankLegalizeCombiner {
98*700637cbSDimitry Andric   MachineIRBuilder &B;
99*700637cbSDimitry Andric   MachineRegisterInfo &MRI;
100*700637cbSDimitry Andric   const SIRegisterInfo &TRI;
101*700637cbSDimitry Andric   const RegisterBank *SgprRB;
102*700637cbSDimitry Andric   const RegisterBank *VgprRB;
103*700637cbSDimitry Andric   const RegisterBank *VccRB;
104*700637cbSDimitry Andric 
105*700637cbSDimitry Andric   static constexpr LLT S1 = LLT::scalar(1);
106*700637cbSDimitry Andric   static constexpr LLT S16 = LLT::scalar(16);
107*700637cbSDimitry Andric   static constexpr LLT S32 = LLT::scalar(32);
108*700637cbSDimitry Andric   static constexpr LLT S64 = LLT::scalar(64);
109*700637cbSDimitry Andric 
110*700637cbSDimitry Andric public:
AMDGPURegBankLegalizeCombiner(MachineIRBuilder & B,const SIRegisterInfo & TRI,const RegisterBankInfo & RBI)111*700637cbSDimitry Andric   AMDGPURegBankLegalizeCombiner(MachineIRBuilder &B, const SIRegisterInfo &TRI,
112*700637cbSDimitry Andric                                 const RegisterBankInfo &RBI)
113*700637cbSDimitry Andric       : B(B), MRI(*B.getMRI()), TRI(TRI),
114*700637cbSDimitry Andric         SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
115*700637cbSDimitry Andric         VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
116*700637cbSDimitry Andric         VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {};
117*700637cbSDimitry Andric 
isLaneMask(Register Reg)118*700637cbSDimitry Andric   bool isLaneMask(Register Reg) {
119*700637cbSDimitry Andric     const RegisterBank *RB = MRI.getRegBankOrNull(Reg);
120*700637cbSDimitry Andric     if (RB && RB->getID() == AMDGPU::VCCRegBankID)
121*700637cbSDimitry Andric       return true;
122*700637cbSDimitry Andric 
123*700637cbSDimitry Andric     const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
124*700637cbSDimitry Andric     return RC && TRI.isSGPRClass(RC) && MRI.getType(Reg) == LLT::scalar(1);
125*700637cbSDimitry Andric   }
126*700637cbSDimitry Andric 
cleanUpAfterCombine(MachineInstr & MI,MachineInstr * Optional0)127*700637cbSDimitry Andric   void cleanUpAfterCombine(MachineInstr &MI, MachineInstr *Optional0) {
128*700637cbSDimitry Andric     MI.eraseFromParent();
129*700637cbSDimitry Andric     if (Optional0 && isTriviallyDead(*Optional0, MRI))
130*700637cbSDimitry Andric       Optional0->eraseFromParent();
131*700637cbSDimitry Andric   }
132*700637cbSDimitry Andric 
tryMatch(Register Src,unsigned Opcode)133*700637cbSDimitry Andric   std::pair<MachineInstr *, Register> tryMatch(Register Src, unsigned Opcode) {
134*700637cbSDimitry Andric     MachineInstr *MatchMI = MRI.getVRegDef(Src);
135*700637cbSDimitry Andric     if (MatchMI->getOpcode() != Opcode)
136*700637cbSDimitry Andric       return {nullptr, Register()};
137*700637cbSDimitry Andric     return {MatchMI, MatchMI->getOperand(1).getReg()};
138*700637cbSDimitry Andric   }
139*700637cbSDimitry Andric 
tryCombineCopy(MachineInstr & MI)140*700637cbSDimitry Andric   void tryCombineCopy(MachineInstr &MI) {
141*700637cbSDimitry Andric     Register Dst = MI.getOperand(0).getReg();
142*700637cbSDimitry Andric     Register Src = MI.getOperand(1).getReg();
143*700637cbSDimitry Andric     // Skip copies of physical registers.
144*700637cbSDimitry Andric     if (!Dst.isVirtual() || !Src.isVirtual())
145*700637cbSDimitry Andric       return;
146*700637cbSDimitry Andric 
147*700637cbSDimitry Andric     // This is a cross bank copy, sgpr S1 to lane mask.
148*700637cbSDimitry Andric     //
149*700637cbSDimitry Andric     // %Src:sgpr(s1) = G_TRUNC %TruncS32Src:sgpr(s32)
150*700637cbSDimitry Andric     // %Dst:lane-mask(s1) = COPY %Src:sgpr(s1)
151*700637cbSDimitry Andric     // ->
152*700637cbSDimitry Andric     // %Dst:lane-mask(s1) = G_AMDGPU_COPY_VCC_SCC %TruncS32Src:sgpr(s32)
153*700637cbSDimitry Andric     if (isLaneMask(Dst) && MRI.getRegBankOrNull(Src) == SgprRB) {
154*700637cbSDimitry Andric       auto [Trunc, TruncS32Src] = tryMatch(Src, AMDGPU::G_TRUNC);
155*700637cbSDimitry Andric       assert(Trunc && MRI.getType(TruncS32Src) == S32 &&
156*700637cbSDimitry Andric              "sgpr S1 must be result of G_TRUNC of sgpr S32");
157*700637cbSDimitry Andric 
158*700637cbSDimitry Andric       B.setInstr(MI);
159*700637cbSDimitry Andric       // Ensure that truncated bits in BoolSrc are 0.
160*700637cbSDimitry Andric       auto One = B.buildConstant({SgprRB, S32}, 1);
161*700637cbSDimitry Andric       auto BoolSrc = B.buildAnd({SgprRB, S32}, TruncS32Src, One);
162*700637cbSDimitry Andric       B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {Dst}, {BoolSrc});
163*700637cbSDimitry Andric       cleanUpAfterCombine(MI, Trunc);
164*700637cbSDimitry Andric       return;
165*700637cbSDimitry Andric     }
166*700637cbSDimitry Andric 
167*700637cbSDimitry Andric     // Src = G_AMDGPU_READANYLANE RALSrc
168*700637cbSDimitry Andric     // Dst = COPY Src
169*700637cbSDimitry Andric     // ->
170*700637cbSDimitry Andric     // Dst = RALSrc
171*700637cbSDimitry Andric     if (MRI.getRegBankOrNull(Dst) == VgprRB &&
172*700637cbSDimitry Andric         MRI.getRegBankOrNull(Src) == SgprRB) {
173*700637cbSDimitry Andric       auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
174*700637cbSDimitry Andric       if (!RAL)
175*700637cbSDimitry Andric         return;
176*700637cbSDimitry Andric 
177*700637cbSDimitry Andric       assert(MRI.getRegBank(RALSrc) == VgprRB);
178*700637cbSDimitry Andric       MRI.replaceRegWith(Dst, RALSrc);
179*700637cbSDimitry Andric       cleanUpAfterCombine(MI, RAL);
180*700637cbSDimitry Andric       return;
181*700637cbSDimitry Andric     }
182*700637cbSDimitry Andric   }
183*700637cbSDimitry Andric 
tryCombineS1AnyExt(MachineInstr & MI)184*700637cbSDimitry Andric   void tryCombineS1AnyExt(MachineInstr &MI) {
185*700637cbSDimitry Andric     // %Src:sgpr(S1) = G_TRUNC %TruncSrc
186*700637cbSDimitry Andric     // %Dst = G_ANYEXT %Src:sgpr(S1)
187*700637cbSDimitry Andric     // ->
188*700637cbSDimitry Andric     // %Dst = G_... %TruncSrc
189*700637cbSDimitry Andric     Register Dst = MI.getOperand(0).getReg();
190*700637cbSDimitry Andric     Register Src = MI.getOperand(1).getReg();
191*700637cbSDimitry Andric     if (MRI.getType(Src) != S1)
192*700637cbSDimitry Andric       return;
193*700637cbSDimitry Andric 
194*700637cbSDimitry Andric     auto [Trunc, TruncSrc] = tryMatch(Src, AMDGPU::G_TRUNC);
195*700637cbSDimitry Andric     if (!Trunc)
196*700637cbSDimitry Andric       return;
197*700637cbSDimitry Andric 
198*700637cbSDimitry Andric     LLT DstTy = MRI.getType(Dst);
199*700637cbSDimitry Andric     LLT TruncSrcTy = MRI.getType(TruncSrc);
200*700637cbSDimitry Andric 
201*700637cbSDimitry Andric     if (DstTy == TruncSrcTy) {
202*700637cbSDimitry Andric       MRI.replaceRegWith(Dst, TruncSrc);
203*700637cbSDimitry Andric       cleanUpAfterCombine(MI, Trunc);
204*700637cbSDimitry Andric       return;
205*700637cbSDimitry Andric     }
206*700637cbSDimitry Andric 
207*700637cbSDimitry Andric     B.setInstr(MI);
208*700637cbSDimitry Andric 
209*700637cbSDimitry Andric     if (DstTy == S32 && TruncSrcTy == S64) {
210*700637cbSDimitry Andric       auto Unmerge = B.buildUnmerge({SgprRB, S32}, TruncSrc);
211*700637cbSDimitry Andric       MRI.replaceRegWith(Dst, Unmerge.getReg(0));
212*700637cbSDimitry Andric       cleanUpAfterCombine(MI, Trunc);
213*700637cbSDimitry Andric       return;
214*700637cbSDimitry Andric     }
215*700637cbSDimitry Andric 
216*700637cbSDimitry Andric     if (DstTy == S64 && TruncSrcTy == S32) {
217*700637cbSDimitry Andric       B.buildMergeLikeInstr(MI.getOperand(0).getReg(),
218*700637cbSDimitry Andric                             {TruncSrc, B.buildUndef({SgprRB, S32})});
219*700637cbSDimitry Andric       cleanUpAfterCombine(MI, Trunc);
220*700637cbSDimitry Andric       return;
221*700637cbSDimitry Andric     }
222*700637cbSDimitry Andric 
223*700637cbSDimitry Andric     if (DstTy == S32 && TruncSrcTy == S16) {
224*700637cbSDimitry Andric       B.buildAnyExt(Dst, TruncSrc);
225*700637cbSDimitry Andric       cleanUpAfterCombine(MI, Trunc);
226*700637cbSDimitry Andric       return;
227*700637cbSDimitry Andric     }
228*700637cbSDimitry Andric 
229*700637cbSDimitry Andric     if (DstTy == S16 && TruncSrcTy == S32) {
230*700637cbSDimitry Andric       B.buildTrunc(Dst, TruncSrc);
231*700637cbSDimitry Andric       cleanUpAfterCombine(MI, Trunc);
232*700637cbSDimitry Andric       return;
233*700637cbSDimitry Andric     }
234*700637cbSDimitry Andric 
235*700637cbSDimitry Andric     llvm_unreachable("missing anyext + trunc combine");
236*700637cbSDimitry Andric   }
237*700637cbSDimitry Andric };
238*700637cbSDimitry Andric 
239*700637cbSDimitry Andric // Search through MRI for virtual registers with sgpr register bank and S1 LLT.
getAnySgprS1(const MachineRegisterInfo & MRI)240*700637cbSDimitry Andric [[maybe_unused]] static Register getAnySgprS1(const MachineRegisterInfo &MRI) {
241*700637cbSDimitry Andric   const LLT S1 = LLT::scalar(1);
242*700637cbSDimitry Andric   for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
243*700637cbSDimitry Andric     Register Reg = Register::index2VirtReg(i);
244*700637cbSDimitry Andric     if (MRI.def_empty(Reg) || MRI.getType(Reg) != S1)
245*700637cbSDimitry Andric       continue;
246*700637cbSDimitry Andric 
247*700637cbSDimitry Andric     const RegisterBank *RB = MRI.getRegBankOrNull(Reg);
248*700637cbSDimitry Andric     if (RB && RB->getID() == AMDGPU::SGPRRegBankID) {
249*700637cbSDimitry Andric       LLVM_DEBUG(dbgs() << "Warning: detected sgpr S1 register in: ";
250*700637cbSDimitry Andric                  MRI.getVRegDef(Reg)->dump(););
251*700637cbSDimitry Andric       return Reg;
252*700637cbSDimitry Andric     }
253*700637cbSDimitry Andric   }
254*700637cbSDimitry Andric 
255*700637cbSDimitry Andric   return {};
256*700637cbSDimitry Andric }
257*700637cbSDimitry Andric 
runOnMachineFunction(MachineFunction & MF)258*700637cbSDimitry Andric bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
259*700637cbSDimitry Andric   if (MF.getProperties().hasFailedISel())
260*700637cbSDimitry Andric     return false;
261*700637cbSDimitry Andric 
262*700637cbSDimitry Andric   // Setup the instruction builder with CSE.
263*700637cbSDimitry Andric   const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
264*700637cbSDimitry Andric   GISelCSEAnalysisWrapper &Wrapper =
265*700637cbSDimitry Andric       getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
266*700637cbSDimitry Andric   GISelCSEInfo &CSEInfo = Wrapper.get(TPC.getCSEConfig());
267*700637cbSDimitry Andric   GISelObserverWrapper Observer;
268*700637cbSDimitry Andric   Observer.addObserver(&CSEInfo);
269*700637cbSDimitry Andric 
270*700637cbSDimitry Andric   CSEMIRBuilder B(MF);
271*700637cbSDimitry Andric   B.setCSEInfo(&CSEInfo);
272*700637cbSDimitry Andric   B.setChangeObserver(Observer);
273*700637cbSDimitry Andric 
274*700637cbSDimitry Andric   RAIIDelegateInstaller DelegateInstaller(MF, &Observer);
275*700637cbSDimitry Andric   RAIIMFObserverInstaller MFObserverInstaller(MF, Observer);
276*700637cbSDimitry Andric 
277*700637cbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
278*700637cbSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
279*700637cbSDimitry Andric   const RegisterBankInfo &RBI = *ST.getRegBankInfo();
280*700637cbSDimitry Andric   const MachineUniformityInfo &MUI =
281*700637cbSDimitry Andric       getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
282*700637cbSDimitry Andric 
283*700637cbSDimitry Andric   // RegBankLegalizeRules is initialized with assigning sets of IDs to opcodes.
284*700637cbSDimitry Andric   const RegBankLegalizeRules &RBLRules = getRules(ST, MRI);
285*700637cbSDimitry Andric 
286*700637cbSDimitry Andric   // Logic that does legalization based on IDs assigned to Opcode.
287*700637cbSDimitry Andric   RegBankLegalizeHelper RBLHelper(B, MUI, RBI, RBLRules);
288*700637cbSDimitry Andric 
289*700637cbSDimitry Andric   SmallVector<MachineInstr *> AllInst;
290*700637cbSDimitry Andric 
291*700637cbSDimitry Andric   for (MachineBasicBlock &MBB : MF) {
292*700637cbSDimitry Andric     for (MachineInstr &MI : MBB) {
293*700637cbSDimitry Andric       AllInst.push_back(&MI);
294*700637cbSDimitry Andric     }
295*700637cbSDimitry Andric   }
296*700637cbSDimitry Andric 
297*700637cbSDimitry Andric   for (MachineInstr *MI : AllInst) {
298*700637cbSDimitry Andric     if (!MI->isPreISelOpcode())
299*700637cbSDimitry Andric       continue;
300*700637cbSDimitry Andric 
301*700637cbSDimitry Andric     unsigned Opc = MI->getOpcode();
302*700637cbSDimitry Andric     // Insert point for use operands needs some calculation.
303*700637cbSDimitry Andric     if (Opc == AMDGPU::G_PHI) {
304*700637cbSDimitry Andric       RBLHelper.applyMappingPHI(*MI);
305*700637cbSDimitry Andric       continue;
306*700637cbSDimitry Andric     }
307*700637cbSDimitry Andric 
308*700637cbSDimitry Andric     // Opcodes that support pretty much all combinations of reg banks and LLTs
309*700637cbSDimitry Andric     // (except S1). There is no point in writing rules for them.
310*700637cbSDimitry Andric     if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_UNMERGE_VALUES ||
311*700637cbSDimitry Andric         Opc == AMDGPU::G_MERGE_VALUES || Opc == AMDGPU::G_BITCAST) {
312*700637cbSDimitry Andric       RBLHelper.applyMappingTrivial(*MI);
313*700637cbSDimitry Andric       continue;
314*700637cbSDimitry Andric     }
315*700637cbSDimitry Andric 
316*700637cbSDimitry Andric     // Opcodes that also support S1.
317*700637cbSDimitry Andric     if (Opc == G_FREEZE &&
318*700637cbSDimitry Andric         MRI.getType(MI->getOperand(0).getReg()) != LLT::scalar(1)) {
319*700637cbSDimitry Andric       RBLHelper.applyMappingTrivial(*MI);
320*700637cbSDimitry Andric       continue;
321*700637cbSDimitry Andric     }
322*700637cbSDimitry Andric 
323*700637cbSDimitry Andric     if ((Opc == AMDGPU::G_CONSTANT || Opc == AMDGPU::G_FCONSTANT ||
324*700637cbSDimitry Andric          Opc == AMDGPU::G_IMPLICIT_DEF)) {
325*700637cbSDimitry Andric       Register Dst = MI->getOperand(0).getReg();
326*700637cbSDimitry Andric       // Non S1 types are trivially accepted.
327*700637cbSDimitry Andric       if (MRI.getType(Dst) != LLT::scalar(1)) {
328*700637cbSDimitry Andric         assert(MRI.getRegBank(Dst)->getID() == AMDGPU::SGPRRegBankID);
329*700637cbSDimitry Andric         continue;
330*700637cbSDimitry Andric       }
331*700637cbSDimitry Andric 
332*700637cbSDimitry Andric       // S1 rules are in RegBankLegalizeRules.
333*700637cbSDimitry Andric     }
334*700637cbSDimitry Andric 
335*700637cbSDimitry Andric     RBLHelper.findRuleAndApplyMapping(*MI);
336*700637cbSDimitry Andric   }
337*700637cbSDimitry Andric 
338*700637cbSDimitry Andric   // Sgpr S1 clean up combines:
339*700637cbSDimitry Andric   // - Sgpr S1(S32) to sgpr S1(S32) Copy: anyext + trunc combine.
340*700637cbSDimitry Andric   //   In RegBankLegalize 'S1 Dst' are legalized into S32 as
341*700637cbSDimitry Andric   //   'S1Dst = Trunc S32Dst' and 'S1 Src' into 'S32Src = Anyext S1Src'.
342*700637cbSDimitry Andric   //   S1 Truncs and Anyexts that come from legalizer, that can have non-S32
343*700637cbSDimitry Andric   //   types e.g. S16 = Anyext S1 or S1 = Trunc S64, will also be cleaned up.
344*700637cbSDimitry Andric   // - Sgpr S1(S32) to vcc Copy: G_AMDGPU_COPY_VCC_SCC combine.
345*700637cbSDimitry Andric   //   Divergent instruction uses sgpr S1 as input that should be lane mask(vcc)
346*700637cbSDimitry Andric   //   Legalizing this use creates sgpr S1(S32) to vcc Copy.
347*700637cbSDimitry Andric 
348*700637cbSDimitry Andric   // Note: Remaining S1 copies, S1s are either sgpr S1(S32) or vcc S1:
349*700637cbSDimitry Andric   // - Vcc to vcc Copy: nothing to do here, just a regular copy.
350*700637cbSDimitry Andric   // - Vcc to sgpr S1 Copy: Should not exist in a form of COPY instruction(*).
351*700637cbSDimitry Andric   //   Note: For 'uniform-in-vcc to sgpr-S1 copy' G_AMDGPU_COPY_SCC_VCC is used
352*700637cbSDimitry Andric   //   instead. When only available instruction creates vcc result, use of
353*700637cbSDimitry Andric   //   UniformInVcc results in creating G_AMDGPU_COPY_SCC_VCC.
354*700637cbSDimitry Andric 
355*700637cbSDimitry Andric   // (*)Explanation for 'sgpr S1(uniform) = COPY vcc(divergent)':
356*700637cbSDimitry Andric   // Copy from divergent to uniform register indicates an error in either:
357*700637cbSDimitry Andric   // - Uniformity analysis: Uniform instruction has divergent input. If one of
358*700637cbSDimitry Andric   //   the inputs is divergent, instruction should be divergent!
359*700637cbSDimitry Andric   // - RegBankLegalizer not executing in waterfall loop (missing implementation)
360*700637cbSDimitry Andric 
361*700637cbSDimitry Andric   AMDGPURegBankLegalizeCombiner Combiner(B, *ST.getRegisterInfo(), RBI);
362*700637cbSDimitry Andric 
363*700637cbSDimitry Andric   for (MachineBasicBlock &MBB : MF) {
364*700637cbSDimitry Andric     for (MachineInstr &MI : make_early_inc_range(MBB)) {
365*700637cbSDimitry Andric       if (MI.getOpcode() == AMDGPU::COPY) {
366*700637cbSDimitry Andric         Combiner.tryCombineCopy(MI);
367*700637cbSDimitry Andric         continue;
368*700637cbSDimitry Andric       }
369*700637cbSDimitry Andric       if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
370*700637cbSDimitry Andric         Combiner.tryCombineS1AnyExt(MI);
371*700637cbSDimitry Andric         continue;
372*700637cbSDimitry Andric       }
373*700637cbSDimitry Andric     }
374*700637cbSDimitry Andric   }
375*700637cbSDimitry Andric 
376*700637cbSDimitry Andric   assert(!getAnySgprS1(MRI).isValid() &&
377*700637cbSDimitry Andric          "Registers with sgpr reg bank and S1 LLT are not legal after "
378*700637cbSDimitry Andric          "AMDGPURegBankLegalize. Should lower to sgpr S32");
379*700637cbSDimitry Andric 
380*700637cbSDimitry Andric   return true;
381*700637cbSDimitry Andric }
382