xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp (revision 9c77fb6aaa366cbabc80ee1b834bcfe4df135491)
1 //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
11 /// 128 Alu instructions ; these instructions can access up to 4 prefetched
12 /// 4 lines of 16 registers from constant buffers. Such ALU clauses are
13 /// initiated by CF_ALU instructions.
14 //===----------------------------------------------------------------------===//
15 
16 #include "MCTargetDesc/R600MCTargetDesc.h"
17 #include "R600.h"
18 #include "R600Defines.h"
19 #include "R600Subtarget.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 
22 using namespace llvm;
23 
24 namespace {
25 
26 class R600EmitClauseMarkers : public MachineFunctionPass {
27 private:
28   const R600InstrInfo *TII = nullptr;
29   int Address = 0;
30 
31   unsigned OccupiedDwords(MachineInstr &MI) const {
32     switch (MI.getOpcode()) {
33     case R600::INTERP_PAIR_XY:
34     case R600::INTERP_PAIR_ZW:
35     case R600::INTERP_VEC_LOAD:
36     case R600::DOT_4:
37       return 4;
38     case R600::KILL:
39       return 0;
40     default:
41       break;
42     }
43 
44     // These will be expanded to two ALU instructions in the
45     // ExpandSpecialInstructions pass.
46     if (TII->isLDSRetInstr(MI.getOpcode()))
47       return 2;
48 
49     if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) ||
50         TII->isReductionOp(MI.getOpcode()))
51       return 4;
52 
53     unsigned NumLiteral = 0;
54     for (MachineInstr::mop_iterator It = MI.operands_begin(),
55                                     E = MI.operands_end();
56          It != E; ++It) {
57       MachineOperand &MO = *It;
58       if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
59         ++NumLiteral;
60     }
61     return 1 + NumLiteral;
62   }
63 
64   bool isALU(const MachineInstr &MI) const {
65     if (TII->isALUInstr(MI.getOpcode()))
66       return true;
67     if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
68       return true;
69     switch (MI.getOpcode()) {
70     case R600::PRED_X:
71     case R600::INTERP_PAIR_XY:
72     case R600::INTERP_PAIR_ZW:
73     case R600::INTERP_VEC_LOAD:
74     case R600::COPY:
75     case R600::DOT_4:
76       return true;
77     default:
78       return false;
79     }
80   }
81 
82   bool IsTrivialInst(MachineInstr &MI) const {
83     switch (MI.getOpcode()) {
84     case R600::KILL:
85     case R600::RETURN:
86     case R600::IMPLICIT_DEF:
87       return true;
88     default:
89       return false;
90     }
91   }
92 
93   std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
94     // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
95     // (See also R600ISelLowering.cpp)
96     // ConstIndex value is in [0, 4095];
97     return std::pair<unsigned, unsigned>(
98         ((Sel >> 2) - 512) >> 12, // KC_BANK
99         // Line Number of ConstIndex
100         // A line contains 16 constant registers however KCX bank can lock
101         // two line at the same time ; thus we want to get an even line number.
102         // Line number can be retrieved with (>>4), using (>>5) <<1 generates
103         // an even number.
104         ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
105   }
106 
107   bool
108   SubstituteKCacheBank(MachineInstr &MI,
109                        std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
110                        bool UpdateInstr = true) const {
111     std::vector<std::pair<unsigned, unsigned>> UsedKCache;
112 
113     if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
114       return true;
115 
116     const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
117         TII->getSrcs(MI);
118     assert(
119         (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
120         "Can't assign Const");
121     for (auto &[Op, Sel] : Consts) {
122       if (Op->getReg() != R600::ALU_CONST)
123         continue;
124       unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
125       unsigned KCacheIndex = Index * 4 + Chan;
126       const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
127       if (CachedConsts.empty()) {
128         CachedConsts.push_back(BankLine);
129         UsedKCache.emplace_back(0, KCacheIndex);
130         continue;
131       }
132       if (CachedConsts[0] == BankLine) {
133         UsedKCache.emplace_back(0, KCacheIndex);
134         continue;
135       }
136       if (CachedConsts.size() == 1) {
137         CachedConsts.push_back(BankLine);
138         UsedKCache.emplace_back(1, KCacheIndex);
139         continue;
140       }
141       if (CachedConsts[1] == BankLine) {
142         UsedKCache.emplace_back(1, KCacheIndex);
143         continue;
144       }
145       return false;
146     }
147 
148     if (!UpdateInstr)
149       return true;
150 
151     unsigned j = 0;
152     for (auto &[Op, Sel] : Consts) {
153       if (Op->getReg() != R600::ALU_CONST)
154         continue;
155       switch (UsedKCache[j].first) {
156       case 0:
157         Op->setReg(R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
158         break;
159       case 1:
160         Op->setReg(R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
161         break;
162       default:
163         llvm_unreachable("Wrong Cache Line");
164       }
165       j++;
166     }
167     return true;
168   }
169 
170   bool canClauseLocalKillFitInClause(
171                         unsigned AluInstCount,
172                         std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
173                         MachineBasicBlock::iterator Def,
174                         MachineBasicBlock::iterator BBEnd) {
175     const R600RegisterInfo &TRI = TII->getRegisterInfo();
176     //TODO: change this to defs?
177     for (MachineOperand &MO : Def->all_defs()) {
178       if (TRI.isPhysRegLiveAcrossClauses(MO.getReg()))
179         continue;
180 
181       // Def defines a clause local register, so check that its use will fit
182       // in the clause.
183       unsigned LastUseCount = 0;
184       for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
185         AluInstCount += OccupiedDwords(*UseI);
186         // Make sure we won't need to end the clause due to KCache limitations.
187         if (!SubstituteKCacheBank(*UseI, KCacheBanks, false))
188           return false;
189 
190         // We have reached the maximum instruction limit before finding the
191         // use that kills this register, so we cannot use this def in the
192         // current clause.
193         if (AluInstCount >= TII->getMaxAlusPerClause())
194           return false;
195 
196         // TODO: Is this true? kill flag appears to work OK below
197         // Register kill flags have been cleared by the time we get to this
198         // pass, but it is safe to assume that all uses of this register
199         // occur in the same basic block as its definition, because
200         // it is illegal for the scheduler to schedule them in
201         // different blocks.
202         if (UseI->readsRegister(MO.getReg(), &TRI))
203           LastUseCount = AluInstCount;
204 
205         // Exit early if the current use kills the register
206         if (UseI != Def && UseI->killsRegister(MO.getReg(), &TRI))
207           break;
208       }
209       if (LastUseCount)
210         return LastUseCount <= TII->getMaxAlusPerClause();
211       llvm_unreachable("Clause local register live at end of clause.");
212     }
213     return true;
214   }
215 
216   MachineBasicBlock::iterator
217   MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
218     MachineBasicBlock::iterator ClauseHead = I;
219     std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
220     bool PushBeforeModifier = false;
221     unsigned AluInstCount = 0;
222     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
223       if (IsTrivialInst(*I))
224         continue;
225       if (!isALU(*I))
226         break;
227       if (AluInstCount > TII->getMaxAlusPerClause())
228         break;
229       if (I->getOpcode() == R600::PRED_X) {
230         // We put PRED_X in its own clause to ensure that ifcvt won't create
231         // clauses with more than 128 insts.
232         // IfCvt is indeed checking that "then" and "else" branches of an if
233         // statement have less than ~60 insts thus converted clauses can't be
234         // bigger than ~121 insts (predicate setter needs to be in the same
235         // clause as predicated alus).
236         if (AluInstCount > 0)
237           break;
238         if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH)
239           PushBeforeModifier = true;
240         AluInstCount ++;
241         continue;
242       }
243       // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
244       //
245       // * KILL or INTERP instructions
246       // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
247       // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
248       //
249       // XXX: These checks have not been implemented yet.
250       if (TII->mustBeLastInClause(I->getOpcode())) {
251         I++;
252         break;
253       }
254 
255       // If this instruction defines a clause local register, make sure
256       // its use can fit in this clause.
257       if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
258         break;
259 
260       if (!SubstituteKCacheBank(*I, KCacheBanks))
261         break;
262       AluInstCount += OccupiedDwords(*I);
263     }
264     unsigned Opcode = PushBeforeModifier ?
265         R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
266     BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
267     // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
268     // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
269     // pass may assume that identical ALU clause starter at the beginning of a
270     // true and false branch can be factorized which is not the case.
271         .addImm(Address++) // ADDR
272         .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
273         .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
274         .addImm(KCacheBanks.empty()?0:2) // KM0
275         .addImm((KCacheBanks.size() < 2)?0:2) // KM1
276         .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
277         .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
278         .addImm(AluInstCount) // COUNT
279         .addImm(1); // Enabled
280     return I;
281   }
282 
283 public:
284   static char ID;
285 
286   R600EmitClauseMarkers() : MachineFunctionPass(ID) {}
287 
288   bool runOnMachineFunction(MachineFunction &MF) override {
289     const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
290     TII = ST.getInstrInfo();
291 
292     for (MachineBasicBlock &MBB : MF) {
293       MachineBasicBlock::iterator I = MBB.begin();
294       if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
295         continue; // BB was already parsed
296       for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
297         if (isALU(*I)) {
298           auto next = MakeALUClause(MBB, I);
299           assert(next != I);
300           I = next;
301         } else
302           ++I;
303       }
304     }
305     return false;
306   }
307 
308   StringRef getPassName() const override {
309     return "R600 Emit Clause Markers Pass";
310   }
311 };
312 
313 char R600EmitClauseMarkers::ID = 0;
314 
315 } // end anonymous namespace
316 
317 INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
318                       "R600 Emit Clause Markers", false, false)
319 INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
320                     "R600 Emit Clause Markers", false, false)
321 
322 FunctionPass *llvm::createR600EmitClauseMarkers() {
323   return new R600EmitClauseMarkers();
324 }
325