xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- AMDGPURegBankSelect.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// Assign register banks to all register operands of G_ instructions using
10 /// machine uniformity analysis.
11 /// Sgpr - uniform values and some lane masks
12 /// Vgpr - divergent, non S1, values
13 /// Vcc  - divergent S1 values(lane masks)
14 /// However in some cases G_ instructions with this register bank assignment
15 /// can't be inst-selected. This is solved in AMDGPURegBankLegalize.
16 //===----------------------------------------------------------------------===//
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUGlobalISelUtils.h"
20 #include "GCNSubtarget.h"
21 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
22 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
23 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
24 #include "llvm/CodeGen/TargetPassConfig.h"
25 #include "llvm/InitializePasses.h"
26 
27 #define DEBUG_TYPE "amdgpu-regbankselect"
28 
29 using namespace llvm;
30 using namespace AMDGPU;
31 
32 namespace {
33 
34 class AMDGPURegBankSelect : public MachineFunctionPass {
35 public:
36   static char ID;
37 
AMDGPURegBankSelect()38   AMDGPURegBankSelect() : MachineFunctionPass(ID) {}
39 
40   bool runOnMachineFunction(MachineFunction &MF) override;
41 
getPassName() const42   StringRef getPassName() const override {
43     return "AMDGPU Register Bank Select";
44   }
45 
getAnalysisUsage(AnalysisUsage & AU) const46   void getAnalysisUsage(AnalysisUsage &AU) const override {
47     AU.addRequired<TargetPassConfig>();
48     AU.addRequired<GISelCSEAnalysisWrapperPass>();
49     AU.addRequired<MachineUniformityAnalysisPass>();
50     MachineFunctionPass::getAnalysisUsage(AU);
51   }
52 
53   // This pass assigns register banks to all virtual registers, and we maintain
54   // this property in subsequent passes
getSetProperties() const55   MachineFunctionProperties getSetProperties() const override {
56     return MachineFunctionProperties().setRegBankSelected();
57   }
58 };
59 
60 } // End anonymous namespace.
61 
62 INITIALIZE_PASS_BEGIN(AMDGPURegBankSelect, DEBUG_TYPE,
63                       "AMDGPU Register Bank Select", false, false)
64 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
65 INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
66 INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
67 INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE,
68                     "AMDGPU Register Bank Select", false, false)
69 
70 char AMDGPURegBankSelect::ID = 0;
71 
72 char &llvm::AMDGPURegBankSelectID = AMDGPURegBankSelect::ID;
73 
createAMDGPURegBankSelectPass()74 FunctionPass *llvm::createAMDGPURegBankSelectPass() {
75   return new AMDGPURegBankSelect();
76 }
77 
78 class RegBankSelectHelper {
79   MachineIRBuilder &B;
80   MachineRegisterInfo &MRI;
81   AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
82   const MachineUniformityInfo &MUI;
83   const SIRegisterInfo &TRI;
84   const RegisterBank *SgprRB;
85   const RegisterBank *VgprRB;
86   const RegisterBank *VccRB;
87 
88 public:
RegBankSelectHelper(MachineIRBuilder & B,AMDGPU::IntrinsicLaneMaskAnalyzer & ILMA,const MachineUniformityInfo & MUI,const SIRegisterInfo & TRI,const RegisterBankInfo & RBI)89   RegBankSelectHelper(MachineIRBuilder &B,
90                       AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
91                       const MachineUniformityInfo &MUI,
92                       const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
93       : B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI), TRI(TRI),
94         SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
95         VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
96         VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
97 
98   // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of
99   // the cycle
100   // Note: uniformity analysis does not consider that registers with vgpr def
101   // are divergent (you can have uniform value in vgpr).
102   // - TODO: implicit use of $exec could be implemented as indicator that
103   //   instruction is divergent
isTemporalDivergenceCopy(Register Reg)104   bool isTemporalDivergenceCopy(Register Reg) {
105     MachineInstr *MI = MRI.getVRegDef(Reg);
106     if (!MI->isCopy() || MI->getNumImplicitOperands() != 1)
107       return false;
108 
109     return MI->implicit_operands().begin()->getReg() == TRI.getExec();
110   }
111 
getRegBankToAssign(Register Reg)112   const RegisterBank *getRegBankToAssign(Register Reg) {
113     if (!isTemporalDivergenceCopy(Reg) &&
114         (MUI.isUniform(Reg) || ILMA.isS32S64LaneMask(Reg)))
115       return SgprRB;
116     if (MRI.getType(Reg) == LLT::scalar(1))
117       return VccRB;
118     return VgprRB;
119   }
120 
121   // %rc:RegClass(s32) = G_ ...
122   // ...
123   // %a = G_ ..., %rc
124   // ->
125   // %rb:RegBank(s32) = G_ ...
126   // %rc:RegClass(s32) = COPY %rb
127   // ...
128   // %a = G_ ..., %rb
reAssignRegBankOnDef(MachineInstr & MI,MachineOperand & DefOP,const RegisterBank * RB)129   void reAssignRegBankOnDef(MachineInstr &MI, MachineOperand &DefOP,
130                             const RegisterBank *RB) {
131     // Register that already has Register class got it during pre-inst selection
132     // of another instruction. Maybe cross bank copy was required so we insert a
133     // copy that can be removed later. This simplifies post regbanklegalize
134     // combiner and avoids need to special case some patterns.
135     Register Reg = DefOP.getReg();
136     LLT Ty = MRI.getType(Reg);
137     Register NewReg = MRI.createVirtualRegister({RB, Ty});
138     DefOP.setReg(NewReg);
139 
140     auto &MBB = *MI.getParent();
141     B.setInsertPt(MBB, MBB.SkipPHIsAndLabels(std::next(MI.getIterator())));
142     B.buildCopy(Reg, NewReg);
143 
144     // The problem was discovered for uniform S1 that was used as both
145     // lane mask(vcc) and regular sgpr S1.
146     // - lane-mask(vcc) use was by si_if, this use is divergent and requires
147     //   non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
148     //   sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
149     // - the regular sgpr S1(uniform) instruction is now broken since
150     //   it uses sreg_64_xexec(S1) which is divergent.
151 
152     // Replace virtual registers with register class on generic instructions
153     // uses with virtual registers with register bank.
154     for (auto &UseMI : make_early_inc_range(MRI.use_instructions(Reg))) {
155       if (UseMI.isPreISelOpcode()) {
156         for (MachineOperand &Op : UseMI.operands()) {
157           if (Op.isReg() && Op.getReg() == Reg)
158             Op.setReg(NewReg);
159         }
160       }
161     }
162   }
163 
164   // %a = G_ ..., %rc
165   // ->
166   // %rb:RegBank(s32) = COPY %rc
167   // %a = G_ ..., %rb
constrainRegBankUse(MachineInstr & MI,MachineOperand & UseOP,const RegisterBank * RB)168   void constrainRegBankUse(MachineInstr &MI, MachineOperand &UseOP,
169                            const RegisterBank *RB) {
170     Register Reg = UseOP.getReg();
171 
172     LLT Ty = MRI.getType(Reg);
173     Register NewReg = MRI.createVirtualRegister({RB, Ty});
174     UseOP.setReg(NewReg);
175 
176     if (MI.isPHI()) {
177       auto DefMI = MRI.getVRegDef(Reg)->getIterator();
178       MachineBasicBlock *DefMBB = DefMI->getParent();
179       B.setInsertPt(*DefMBB, DefMBB->SkipPHIsAndLabels(std::next(DefMI)));
180     } else {
181       B.setInstr(MI);
182     }
183 
184     B.buildCopy(NewReg, Reg);
185   }
186 };
187 
getVReg(MachineOperand & Op)188 static Register getVReg(MachineOperand &Op) {
189   if (!Op.isReg())
190     return {};
191 
192   // Operands of COPY and G_SI_CALL can be physical registers.
193   Register Reg = Op.getReg();
194   if (!Reg.isVirtual())
195     return {};
196 
197   return Reg;
198 }
199 
runOnMachineFunction(MachineFunction & MF)200 bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) {
201   if (MF.getProperties().hasFailedISel())
202     return false;
203 
204   // Setup the instruction builder with CSE.
205   const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
206   GISelCSEAnalysisWrapper &Wrapper =
207       getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
208   GISelCSEInfo &CSEInfo = Wrapper.get(TPC.getCSEConfig());
209   GISelObserverWrapper Observer;
210   Observer.addObserver(&CSEInfo);
211 
212   CSEMIRBuilder B(MF);
213   B.setCSEInfo(&CSEInfo);
214   B.setChangeObserver(Observer);
215 
216   RAIIDelegateInstaller DelegateInstaller(MF, &Observer);
217   RAIIMFObserverInstaller MFObserverInstaller(MF, Observer);
218 
219   IntrinsicLaneMaskAnalyzer ILMA(MF);
220   MachineUniformityInfo &MUI =
221       getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
222   MachineRegisterInfo &MRI = *B.getMRI();
223   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
224   RegBankSelectHelper RBSHelper(B, ILMA, MUI, *ST.getRegisterInfo(),
225                                 *ST.getRegBankInfo());
226   // Virtual registers at this point don't have register banks.
227   // Virtual registers in def and use operands of already inst-selected
228   // instruction have register class.
229 
230   for (MachineBasicBlock &MBB : MF) {
231     for (MachineInstr &MI : MBB) {
232       // Vregs in def and use operands of COPY can have either register class
233       // or bank. If there is neither on vreg in def operand, assign bank.
234       if (MI.isCopy()) {
235         Register DefReg = getVReg(MI.getOperand(0));
236         if (!DefReg.isValid() || MRI.getRegClassOrNull(DefReg))
237           continue;
238 
239         assert(!MRI.getRegBankOrNull(DefReg));
240         MRI.setRegBank(DefReg, *RBSHelper.getRegBankToAssign(DefReg));
241         continue;
242       }
243 
244       if (!MI.isPreISelOpcode())
245         continue;
246 
247       // Vregs in def and use operands of G_ instructions need to have register
248       // banks assigned. Before this loop possible case are
249       // - (1) vreg without register class or bank in def or use operand
250       // - (2) vreg with register class in def operand
251       // - (3) vreg, defined by G_ instruction, in use operand
252       // - (4) vreg, defined by pre-inst-selected instruction, in use operand
253 
254       // First three cases are handled in loop through all def operands of G_
255       // instructions. For case (1) simply setRegBank. Cases (2) and (3) are
256       // handled by reAssignRegBankOnDef.
257       for (MachineOperand &DefOP : MI.defs()) {
258         Register DefReg = getVReg(DefOP);
259         if (!DefReg.isValid())
260           continue;
261 
262         const RegisterBank *RB = RBSHelper.getRegBankToAssign(DefReg);
263         if (MRI.getRegClassOrNull(DefReg))
264           RBSHelper.reAssignRegBankOnDef(MI, DefOP, RB);
265         else {
266           assert(!MRI.getRegBankOrNull(DefReg));
267           MRI.setRegBank(DefReg, *RB);
268         }
269       }
270 
271       // Register bank select doesn't modify pre-inst-selected instructions.
272       // For case (4) need to insert a copy, handled by constrainRegBankUse.
273       for (MachineOperand &UseOP : MI.uses()) {
274         Register UseReg = getVReg(UseOP);
275         if (!UseReg.isValid())
276           continue;
277 
278         // Skip case (3).
279         if (!MRI.getRegClassOrNull(UseReg) ||
280             MRI.getVRegDef(UseReg)->isPreISelOpcode())
281           continue;
282 
283         // Use with register class defined by pre-inst-selected instruction.
284         const RegisterBank *RB = RBSHelper.getRegBankToAssign(UseReg);
285         RBSHelper.constrainRegBankUse(MI, UseOP, RB);
286       }
287     }
288   }
289 
290   return true;
291 }
292