xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Packetizer.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass implements instructions packetization for R600. It unsets isLast
11 /// bit of instructions inside a bundle and substitutes src register with
12 /// PreviousVector when applicable.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "MCTargetDesc/R600MCTargetDesc.h"
17 #include "R600.h"
18 #include "R600Subtarget.h"
19 #include "llvm/CodeGen/DFAPacketizer.h"
20 #include "llvm/CodeGen/MachineDominators.h"
21 #include "llvm/CodeGen/MachineLoopInfo.h"
22 #include "llvm/CodeGen/ScheduleDAG.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "packets"
27 
28 namespace {
29 
30 class R600Packetizer : public MachineFunctionPass {
31 
32 public:
33   static char ID;
34   R600Packetizer() : MachineFunctionPass(ID) {}
35 
36   void getAnalysisUsage(AnalysisUsage &AU) const override {
37     AU.setPreservesCFG();
38     AU.addRequired<MachineDominatorTreeWrapperPass>();
39     AU.addPreserved<MachineDominatorTreeWrapperPass>();
40     AU.addRequired<MachineLoopInfoWrapperPass>();
41     AU.addPreserved<MachineLoopInfoWrapperPass>();
42     MachineFunctionPass::getAnalysisUsage(AU);
43   }
44 
45   StringRef getPassName() const override { return "R600 Packetizer"; }
46 
47   bool runOnMachineFunction(MachineFunction &Fn) override;
48 };
49 
50 class R600PacketizerList : public VLIWPacketizerList {
51 private:
52   const R600InstrInfo *TII;
53   const R600RegisterInfo &TRI;
54   bool VLIW5;
55   bool ConsideredInstUsesAlreadyWrittenVectorElement;
56 
57   unsigned getSlot(const MachineInstr &MI) const {
58     return TRI.getHWRegChan(MI.getOperand(0).getReg());
59   }
60 
61   /// \returns register to PV chan mapping for bundle/single instructions that
62   /// immediately precedes I.
63   DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
64       const {
65     DenseMap<unsigned, unsigned> Result;
66     I--;
67     if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
68       return Result;
69     MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
70     if (I->isBundle())
71       BI++;
72     int LastDstChan = -1;
73     do {
74       bool isTrans = false;
75       int BISlot = getSlot(*BI);
76       if (LastDstChan >= BISlot)
77         isTrans = true;
78       LastDstChan = BISlot;
79       if (TII->isPredicated(*BI))
80         continue;
81       int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);
82       if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
83         continue;
84       int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);
85       if (DstIdx == -1) {
86         continue;
87       }
88       Register Dst = BI->getOperand(DstIdx).getReg();
89       if (isTrans || TII->isTransOnly(*BI)) {
90         Result[Dst] = R600::PS;
91         continue;
92       }
93       if (BI->getOpcode() == R600::DOT4_r600 ||
94           BI->getOpcode() == R600::DOT4_eg) {
95         Result[Dst] = R600::PV_X;
96         continue;
97       }
98       if (Dst == R600::OQAP) {
99         continue;
100       }
101       unsigned PVReg = 0;
102       switch (TRI.getHWRegChan(Dst)) {
103       case 0:
104         PVReg = R600::PV_X;
105         break;
106       case 1:
107         PVReg = R600::PV_Y;
108         break;
109       case 2:
110         PVReg = R600::PV_Z;
111         break;
112       case 3:
113         PVReg = R600::PV_W;
114         break;
115       default:
116         llvm_unreachable("Invalid Chan");
117       }
118       Result[Dst] = PVReg;
119     } while ((++BI)->isBundledWithPred());
120     return Result;
121   }
122 
123   void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
124       const {
125     unsigned Ops[] = {
126       R600::OpName::src0,
127       R600::OpName::src1,
128       R600::OpName::src2
129     };
130     for (unsigned Op : Ops) {
131       int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Op);
132       if (OperandIdx < 0)
133         continue;
134       Register Src = MI.getOperand(OperandIdx).getReg();
135       const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
136       if (It != PVs.end())
137         MI.getOperand(OperandIdx).setReg(It->second);
138     }
139   }
140 public:
141   // Ctor.
142   R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST,
143                      MachineLoopInfo &MLI)
144       : VLIWPacketizerList(MF, MLI, nullptr),
145         TII(ST.getInstrInfo()),
146         TRI(TII->getRegisterInfo()) {
147     VLIW5 = !ST.hasCaymanISA();
148   }
149 
150   // initPacketizerState - initialize some internal flags.
151   void initPacketizerState() override {
152     ConsideredInstUsesAlreadyWrittenVectorElement = false;
153   }
154 
155   // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
156   bool ignorePseudoInstruction(const MachineInstr &MI,
157                                const MachineBasicBlock *MBB) override {
158     return false;
159   }
160 
161   // isSoloInstruction - return true if instruction MI can not be packetized
162   // with any other instruction, which means that MI itself is a packet.
163   bool isSoloInstruction(const MachineInstr &MI) override {
164     if (TII->isVector(MI))
165       return true;
166     if (!TII->isALUInstr(MI.getOpcode()))
167       return true;
168     if (MI.getOpcode() == R600::GROUP_BARRIER)
169       return true;
170     // XXX: This can be removed once the packetizer properly handles all the
171     // LDS instruction group restrictions.
172     return TII->isLDSInstr(MI.getOpcode());
173   }
174 
175   // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
176   // together.
177   bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
178     MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
179     if (getSlot(*MII) == getSlot(*MIJ))
180       ConsideredInstUsesAlreadyWrittenVectorElement = true;
181     // Does MII and MIJ share the same pred_sel ?
182     int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
183         OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
184     Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : Register(),
185       PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : Register();
186     if (PredI != PredJ)
187       return false;
188     if (SUJ->isSucc(SUI)) {
189       for (const SDep &Dep : SUJ->Succs) {
190         if (Dep.getSUnit() != SUI)
191           continue;
192         if (Dep.getKind() == SDep::Anti)
193           continue;
194         if (Dep.getKind() == SDep::Output)
195           if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
196             continue;
197         return false;
198       }
199     }
200 
201     bool ARDef =
202         TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ);
203     bool ARUse =
204         TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ);
205 
206     return !ARDef || !ARUse;
207   }
208 
209   // isLegalToPruneDependencies - Is it legal to prune dependency between SUI
210   // and SUJ.
211   bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
212     return false;
213   }
214 
215   void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
216     unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);
217     MI->getOperand(LastOp).setImm(Bit);
218   }
219 
220   bool isBundlableWithCurrentPMI(MachineInstr &MI,
221                                  const DenseMap<unsigned, unsigned> &PV,
222                                  std::vector<R600InstrInfo::BankSwizzle> &BS,
223                                  bool &isTransSlot) {
224     isTransSlot = TII->isTransOnly(MI);
225     assert (!isTransSlot || VLIW5);
226 
227     // Is the dst reg sequence legal ?
228     if (!isTransSlot && !CurrentPacketMIs.empty()) {
229       if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) {
230         if (ConsideredInstUsesAlreadyWrittenVectorElement &&
231             !TII->isVectorOnly(MI) && VLIW5) {
232           isTransSlot = true;
233           LLVM_DEBUG({
234             dbgs() << "Considering as Trans Inst :";
235             MI.dump();
236           });
237         }
238         else
239           return false;
240       }
241     }
242 
243     // Are the Constants limitations met ?
244     CurrentPacketMIs.push_back(&MI);
245     if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
246       LLVM_DEBUG({
247         dbgs() << "Couldn't pack :\n";
248         MI.dump();
249         dbgs() << "with the following packets :\n";
250         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
251           CurrentPacketMIs[i]->dump();
252           dbgs() << "\n";
253         }
254         dbgs() << "because of Consts read limitations\n";
255       });
256       CurrentPacketMIs.pop_back();
257       return false;
258     }
259 
260     // Is there a BankSwizzle set that meet Read Port limitations ?
261     if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
262             PV, BS, isTransSlot)) {
263       LLVM_DEBUG({
264         dbgs() << "Couldn't pack :\n";
265         MI.dump();
266         dbgs() << "with the following packets :\n";
267         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
268           CurrentPacketMIs[i]->dump();
269           dbgs() << "\n";
270         }
271         dbgs() << "because of Read port limitations\n";
272       });
273       CurrentPacketMIs.pop_back();
274       return false;
275     }
276 
277     // We cannot read LDS source registers from the Trans slot.
278     if (isTransSlot && TII->readsLDSSrcReg(MI))
279       return false;
280 
281     CurrentPacketMIs.pop_back();
282     return true;
283   }
284 
285   MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override {
286     MachineBasicBlock::iterator FirstInBundle =
287         CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front();
288     const DenseMap<unsigned, unsigned> &PV =
289         getPreviousVector(FirstInBundle);
290     std::vector<R600InstrInfo::BankSwizzle> BS;
291     bool isTransSlot;
292 
293     if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
294       for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
295         MachineInstr *MI = CurrentPacketMIs[i];
296         unsigned Op = TII->getOperandIdx(MI->getOpcode(),
297             R600::OpName::bank_swizzle);
298         MI->getOperand(Op).setImm(BS[i]);
299       }
300       unsigned Op =
301           TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);
302       MI.getOperand(Op).setImm(BS.back());
303       if (!CurrentPacketMIs.empty())
304         setIsLastBit(CurrentPacketMIs.back(), 0);
305       substitutePV(MI, PV);
306       MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
307       if (isTransSlot) {
308         endPacket(std::next(It)->getParent(), std::next(It));
309       }
310       return It;
311     }
312     endPacket(MI.getParent(), MI);
313     if (TII->isTransOnly(MI))
314       return MI;
315     return VLIWPacketizerList::addToPacket(MI);
316   }
317 };
318 
319 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
320   const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
321   const R600InstrInfo *TII = ST.getInstrInfo();
322 
323   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
324 
325   // Instantiate the packetizer.
326   R600PacketizerList Packetizer(Fn, ST, MLI);
327 
328   // DFA state table should not be empty.
329   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
330   assert(Packetizer.getResourceTracker()->getInstrItins());
331 
332   if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
333     return false;
334 
335   //
336   // Loop over all basic blocks and remove KILL pseudo-instructions
337   // These instructions confuse the dependence analysis. Consider:
338   // D0 = ...   (Insn 0)
339   // R0 = KILL R0, D0 (Insn 1)
340   // R0 = ... (Insn 2)
341   // Here, Insn 1 will result in the dependence graph not emitting an output
342   // dependence between Insn 0 and Insn 2. This can lead to incorrect
343   // packetization
344   //
345   for (MachineBasicBlock &MBB : Fn) {
346     for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
347       if (MI.isKill() || MI.getOpcode() == R600::IMPLICIT_DEF ||
348           (MI.getOpcode() == R600::CF_ALU && !MI.getOperand(8).getImm()))
349         MBB.erase(MI);
350     }
351   }
352 
353   // Loop over all of the basic blocks.
354   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
355        MBB != MBBe; ++MBB) {
356     // Find scheduling regions and schedule / packetize each region.
357     unsigned RemainingCount = MBB->size();
358     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
359         RegionEnd != MBB->begin();) {
360       // The next region starts above the previous region. Look backward in the
361       // instruction stream until we find the nearest boundary.
362       MachineBasicBlock::iterator I = RegionEnd;
363       for(;I != MBB->begin(); --I, --RemainingCount) {
364         if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn))
365           break;
366       }
367       I = MBB->begin();
368 
369       // Skip empty scheduling regions.
370       if (I == RegionEnd) {
371         RegionEnd = std::prev(RegionEnd);
372         --RemainingCount;
373         continue;
374       }
375       // Skip regions with one instruction.
376       if (I == std::prev(RegionEnd)) {
377         RegionEnd = std::prev(RegionEnd);
378         continue;
379       }
380 
381       Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd);
382       RegionEnd = I;
383     }
384   }
385 
386   return true;
387 
388 }
389 
390 } // end anonymous namespace
391 
392 INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE,
393                      "R600 Packetizer", false, false)
394 INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE,
395                     "R600 Packetizer", false, false)
396 
397 char R600Packetizer::ID = 0;
398 
399 char &llvm::R600PacketizerID = R600Packetizer::ID;
400 
401 llvm::FunctionPass *llvm::createR600Packetizer() {
402   return new R600Packetizer();
403 }
404